From eae85c32cad2c7883ce75e98447297697d5c0939 Mon Sep 17 00:00:00 2001 From: Arthur Barraux Date: Wed, 19 Nov 2025 10:37:41 +0100 Subject: [PATCH 1/2] add utf8_char_t struct --- include/append_buffer.h | 2 +- include/data.h | 51 ++++++- include/define.h | 15 +- include/editor_op.h | 4 +- include/input.h | 4 +- include/row_op.h | 2 +- include/terminal.h | 4 +- install.sh | 1 + main.c | 8 ++ src/append_buffer.c | 4 +- src/builtins.c | 49 +++++-- src/editor_op.c | 95 +++++++++---- src/file_io.c | 155 +++++++++++++++----- src/init.c | 2 +- src/input.c | 306 +++++++++++++++++++--------------------- src/output.c | 22 ++- src/row_op.c | 131 +++++++++++++---- src/terminal.c | 254 +++++++++++++++++++++++++-------- 18 files changed, 770 insertions(+), 339 deletions(-) diff --git a/include/append_buffer.h b/include/append_buffer.h index 3f28992..dc266e4 100644 --- a/include/append_buffer.h +++ b/include/append_buffer.h @@ -5,7 +5,7 @@ #include #include -void abAppend(struct abuf *ab, const char *s, int len); +void abAppend(struct abuf *ab, const unsigned char *s, int len); void abFree(struct abuf *ab); diff --git a/include/data.h b/include/data.h index 6eeaa4e..68b236a 100644 --- a/include/data.h +++ b/include/data.h @@ -7,6 +7,12 @@ #include "lisp.h" + +typedef struct{ + unsigned char c[4]; + char len; +} utf_8_char_t; + /** * \struct erow * \brief Store one editor row @@ -16,8 +22,8 @@ typedef struct erow { int size; /**< Size of the line */ int rsize; /**< Size of the render line */ - char *chars; /**< Characters of the line */ - char *render; /**< The actual line we will print */ + utf_8_char_t *chars; /**< Characters of the line */ + utf_8_char_t *render; /**< The actual line we will print */ } erow; enum editorStatus_e { @@ -31,10 +37,45 @@ struct const_t { int QUIT_TIMES; }; +// Key types +typedef enum { + KEY_CHAR, // Regular character or UTF-8 + KEY_CTRL, // Ctrl+letter + KEY_ALT, // Alt+letter + KEY_ARROW, // Arrow keys + KEY_FUNCTION, // Function keys + KEY_SPECIAL, // Tab, Enter, ESC, Backspace, etc. + KEY_NAVIGATION, // Home, End, PgUp, PgDn, Insert, Delete + KEY_UNKNOWN +} KeyType; + +// Modifiers +typedef enum { + MOD_NONE = 0, + MOD_SHIFT = 1, + MOD_ALT = 2, + MOD_CTRL = 4 +} KeyModifier; + +// Key information structure +typedef struct { + KeyType type; + int modifiers; // Bitmask of KeyModifier + union { + unsigned int codepoint; // For KEY_CHAR + char ctrl_char; // For KEY_CTRL (A-Z) + char alt_char; // For KEY_ALT + char arrow; // For KEY_ARROW (U/D/L/R) + int function_num; // For KEY_FUNCTION (1-12) + char special; // For KEY_SPECIAL and KEY_NAVIGATION + } data; + utf_8_char_t c; // Raw bytes +} KeyInfo; + + struct keyBind_t { - char *key_sequence; + KeyInfo *key_sequence; Lisp command; - }; /** @@ -77,7 +118,7 @@ struct editorConfig { * */ struct abuf { - char *b; /**< Text that will be printed */ + unsigned char *b; /**< Text that will be printed */ int len; /**< Length of the text */ }; diff --git a/include/define.h b/include/define.h index 3ff01e1..17c0ec9 100644 --- a/include/define.h +++ b/include/define.h @@ -8,19 +8,10 @@ #define HIDE_CURSOR "\x1b[?25l" #define SHOW_CURSOR "\x1b[?25h" #define ERASE_END_LINE "\x1b[K" +#define TAB "\x09" +#define SPACE "\x20" + -enum editorKey { - BACKSPACE = 127, - ARROW_LEFT = 1000, - ARROW_RIGHT, - ARROW_UP, - ARROW_DOWN, - DEL_KEY, - BEG_LINE, - END_LINE, - PAGE_UP, - PAGE_DOWN, -}; #define ABUF_INIT {NULL, 0} diff --git a/include/editor_op.h b/include/editor_op.h index 757b6c8..a67c7df 100644 --- a/include/editor_op.h +++ b/include/editor_op.h @@ -2,9 +2,9 @@ #define EDITOR_OP_H_ #include "data.h" -void editorInsertChar(int c); +void editorInsertChar(utf_8_char_t *c); -void editorInsertNewLine(); +void editorInsertNewLine(void); void editorDelChar(); diff --git a/include/input.h b/include/input.h index 2c031ef..8799904 100644 --- a/include/input.h +++ b/include/input.h @@ -24,9 +24,9 @@ char *editorPrompt(char *prompt, char * PlaceHolder, char bPathMode); char *key_to_string(int key); -void editorMoveCursor(int key); +void editorMoveCursor(KeyInfo * key); -int executeKeyBind(char *key_sequence); +int executeKeyBind(KeyInfo *key_sequence); /** * \fn void editorProcessKeypress() diff --git a/include/row_op.h b/include/row_op.h index 80973ad..fe350b9 100644 --- a/include/row_op.h +++ b/include/row_op.h @@ -20,7 +20,7 @@ void editorFreeRow(erow *row); void editorDelRow(int at); -void editorRowInsertChar(erow *row, int at, int c); +void editorRowInsertChar(erow *row, int at, utf_8_char_t c); void editorRowAppendString(erow *row, char *s, size_t len); diff --git a/include/terminal.h b/include/terminal.h index 6ab9108..d61a569 100644 --- a/include/terminal.h +++ b/include/terminal.h @@ -25,10 +25,12 @@ void disableRawMode(); void enableRawMode(); -int editorReadKey(); +KeyInfo * editorReadKey(); int getCursorPosition(int *rows, int *cols); +KeyInfo *stringToCodepoint(const char *string); + int getWindowSize(int *rows, int *cols); #endif diff --git a/install.sh b/install.sh index ddf59b0..3b869f0 100755 --- a/install.sh +++ b/install.sh @@ -17,6 +17,7 @@ fi echo "Create config files ..." mkdir -pv ~/.beluga/ cp -rv ./assets/ ~/.beluga/ +mkdir -pv ~/.beluga/config/ mkdir -pv ~/.beluga/packages/ read -p "Do you want to replace your config file or keep it (init.lisp.bak) / (init.lisp.new) ? (Y/n)" confirm diff --git a/main.c b/main.c index 4dd8492..2cd2716 100644 --- a/main.c +++ b/main.c @@ -21,11 +21,19 @@ #include "include/output.h" #include "include/terminal.h" +#include +#include + struct editorConfig E; int main(int argc, char *argv[]) { char * splash_screen = (char *) calloc(256, sizeof(char)); + + // Set support for utf-8 + setlocale(LC_ALL, ""); + + // INIT enableRawMode(); initEditor(); diff --git a/src/append_buffer.c b/src/append_buffer.c index 00e14a3..6afb831 100644 --- a/src/append_buffer.c +++ b/src/append_buffer.c @@ -2,8 +2,8 @@ extern struct editorConfig E; -void abAppend(struct abuf *ab, const char *s, int len) { - char *new = realloc(ab->b, ab->len + len); +void abAppend(struct abuf *ab, const unsigned char *s, int len) { + unsigned char *new = realloc(ab->b, ab->len + len); if (new == NULL) { return; diff --git a/src/builtins.c b/src/builtins.c index 0a1c7a5..b580197 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -5,22 +5,31 @@ #include "../include/editor_op.h" #include "../include/row_op.h" #include "../include/data.h" +#include "../include/terminal.h" #include #include #include +utf_8_char_t make_utf8_char(const char *bytes, int len) { + utf_8_char_t ch; + ch.len = len; + memcpy(ch.c, bytes, len); + return ch; +} + Lisp mapKey(Lisp args, LispError *e, LispContext ctx) { - const char *key_sequence = lisp_string(lisp_car(args)); + const char *key_string = lisp_string(lisp_car(args)); + KeyInfo *key = stringToCodepoint(key_string); args = lisp_cdr(args); // second argument Lisp func = lisp_car(args); E.key_binds = (struct keyBind_t *)realloc(E.key_binds, ++E.number_of_keybinds * sizeof(struct keyBind_t)); - E.key_binds[E.number_of_keybinds - 1].key_sequence = (char *) malloc(50 * sizeof(char)); + E.key_binds[E.number_of_keybinds - 1].key_sequence = (KeyInfo *) malloc(sizeof(KeyInfo)); - strncpy(E.key_binds[E.number_of_keybinds - 1].key_sequence, key_sequence, 50); + memcpy(E.key_binds[E.number_of_keybinds - 1].key_sequence, key, sizeof(KeyInfo)); E.key_binds[E.number_of_keybinds - 1].command = func; @@ -29,26 +38,30 @@ Lisp mapKey(Lisp args, LispError *e, LispContext ctx) { Lisp moveCursor(Lisp args, LispError *e, LispContext ctx) { const char *direction = lisp_string(lisp_car(args)); + KeyInfo key; + key.type = KEY_ARROW; switch (direction[0]) { case 'u': - editorMoveCursor(ARROW_UP); + key.data.arrow = 'A'; break; case 'd': - editorMoveCursor(ARROW_DOWN); + key.data.arrow = 'B'; break; case 'r': - editorMoveCursor(ARROW_RIGHT); + key.data.arrow = 'C'; break; case 'l': - editorMoveCursor(ARROW_LEFT); + key.data.arrow = 'D'; break; } + editorMoveCursor(&key); return lisp_null(); } Lisp editorQuit(Lisp args, LispError* e, LispContext ctx) { + fprintf(stderr, "quit\n"); if (E.dirty && E.quit_times_buffer > 0) { editorSetStatusMessage("WARNING! Changes hasn't been saved. Press Ctrl-Q " "another time to quit."); @@ -77,7 +90,7 @@ Lisp l_editorSave(Lisp args, LispError* e, LispContext ctx) { Lisp l_editorInsertNewLine(Lisp args, LispError* e, LispContext ctx) { - editorInsertNewLine(); + // editorInsertNewLine(); return lisp_null(); @@ -105,8 +118,11 @@ Lisp deletePreviousChar(Lisp args, LispError* e, LispContext ctx) { Lisp editorMoveCursorPageUp(Lisp args, LispError* e, LispContext ctx) { E.cursor_y = E.row_offset; int times = E.screenrows; + KeyInfo key; + key.type = KEY_ARROW; + key.data.arrow = 'D'; while (--times) { - editorMoveCursor(ARROW_UP); + editorMoveCursor(&key); } return lisp_null(); } @@ -117,8 +133,11 @@ Lisp editorMoveCursorPageDown(Lisp args, LispError* e, LispContext ctx) { E.cursor_y = E.numrows; } int times = E.screenrows; + KeyInfo key; + key.type = KEY_ARROW; + key.data.arrow = 'D'; while (--times) { - editorMoveCursor(ARROW_DOWN); + editorMoveCursor(&key); } return lisp_null(); @@ -134,8 +153,9 @@ Lisp editorOpenFile(Lisp args, LispError *e, LispContext ctx) { Lisp editorPrintC(Lisp args, LispError *e, LispContext ctx) { - char c = lisp_string(lisp_car(args))[0]; - editorInsertChar(c); + char *c = lisp_string(lisp_car(args)); + utf_8_char_t ch = make_utf8_char(c, 1); + editorInsertChar(&ch); return lisp_null(); } @@ -168,7 +188,8 @@ Lisp editorFind_L(Lisp args, LispError *e, LispContext ctx) { } Lisp editorReadChar_L(Lisp args, LispError *e, LispContext ctx) { - fprintf(stderr, "char read : %c\n", E.row[E.cursor_y].render[E.cursor_x]); - return lisp_make_char(E.row[E.cursor_y].render[E.cursor_x]); + // fprintf(stderr, "char read : %c\n", E.row[E.cursor_y].render[E.cursor_x]); + // return lisp_make_char(E.row[E.cursor_y].render[E.cursor_x]); + return lisp_null(); } diff --git a/src/editor_op.c b/src/editor_op.c index 3506ffd..cbc5921 100644 --- a/src/editor_op.c +++ b/src/editor_op.c @@ -1,47 +1,94 @@ #include "../include/editor_op.h" #include "../include/row_op.h" +#include "include/data.h" +#include extern struct editorConfig E; -void editorInsertChar(int c) { - if (E.cursor_y == E.numrows) { - editorInsertRow(E.numrows, "", 0); - } - editorRowInsertChar(&E.row[E.cursor_y], E.cursor_x, c); - E.cursor_x++; +void editorInsertChar(utf_8_char_t *c) { + if (E.state == READ_ONLY) return; + fprintf(stderr, "Insert char %s %d\n", c->c, c->len); + // If cursor is past end of file, add empty rows + if (E.cursor_y == E.numrows) { + editorInsertRow(E.numrows, "", 0); + } + + // Insert character at cursor position + editorRowInsertChar(&E.row[E.cursor_y], E.cursor_x, *c); + E.cursor_x++; } -void editorInsertNewLine() { - erow *row; - if (!E.cursor_x) { +void editorInsertNewline(void) { + if (E.state == READ_ONLY) return; + + if (E.cursor_x == 0) { + // Insert blank line before current line editorInsertRow(E.cursor_y, "", 0); } else { - row = &E.row[E.cursor_y]; - editorInsertRow(E.cursor_y + 1, &row->chars[E.cursor_x], - row->size - E.cursor_x); - row = &E.row[E.cursor_y]; + // Split current line at cursor + erow *row = &E.row[E.cursor_y]; + + // Calculate byte length of remaining part + int remaining_chars = row->size - E.cursor_x; + + // Allocate buffer for remaining characters + char *buf = malloc(remaining_chars * 4); // Max 4 bytes per UTF-8 char + int buf_len = 0; + + // Convert utf_8_char_t to bytes + for (int i = E.cursor_x; i < row->size; i++) { + for (int j = 0; j < row->chars[i].len; j++) { + buf[buf_len++] = row->chars[i].c[j]; + } + } + + // Insert new row with remaining text + editorInsertRow(E.cursor_y + 1, buf, buf_len); + free(buf); + + // Truncate current row at cursor + row = &E.row[E.cursor_y]; // Refresh pointer after realloc row->size = E.cursor_x; - row->chars[row->size] = '\0'; editorUpdateRow(row); } - ++E.cursor_y; + + E.cursor_y++; E.cursor_x = 0; } -void editorDelChar() { - erow *row; - if (E.cursor_y == E.numrows || !(E.cursor_x || E.cursor_y)) { - return; - } - row = &E.row[E.cursor_y]; +void editorRowAppendRow(erow *dest, erow *src) { + // Allocate space for combined rows + utf_8_char_t *new_chars = realloc(dest->chars, + sizeof(utf_8_char_t) * (dest->size + src->size)); + if (!new_chars) return; + + dest->chars = new_chars; + + // Copy source row characters + memcpy(&dest->chars[dest->size], src->chars, sizeof(utf_8_char_t) * src->size); + dest->size += src->size; + + editorUpdateRow(dest); + ++E.dirty; +} + +void editorDelChar(void) { + if (E.state == READ_ONLY) return; + if (E.cursor_y == E.numrows) return; + if (E.cursor_x == 0 && E.cursor_y == 0) return; + + erow *row = &E.row[E.cursor_y]; + if (E.cursor_x > 0) { + // Delete character before cursor editorRowDelchar(row, E.cursor_x - 1); - --E.cursor_x; + E.cursor_x--; } else { + // At beginning of line - join with previous line E.cursor_x = E.row[E.cursor_y - 1].size; - editorRowAppendString(&E.row[E.cursor_y - 1], row->chars, row->size); + editorRowAppendRow(&E.row[E.cursor_y - 1], row); editorDelRow(E.cursor_y); - --E.cursor_y; + E.cursor_y--; } } diff --git a/src/file_io.c b/src/file_io.c index a0fb3a7..811d01a 100644 --- a/src/file_io.c +++ b/src/file_io.c @@ -7,6 +7,7 @@ #include #include #include +#include extern char *strdup(const char *); extern ssize_t getline(char **restrict lineptr, size_t *restrict n, @@ -14,37 +15,56 @@ extern ssize_t getline(char **restrict lineptr, size_t *restrict n, extern int ftruncate(int fd, off_t length); extern struct editorConfig E; +// Convert utf_8_char_t array to byte string char *editorRowsToString(int *buffer_len) { int tot_len = 0; - int j; + int j, i; char *buf; char *p; + // Calculate total byte length (not character count) for (j = 0; j < E.numrows; ++j) { - tot_len += E.row[j].size + 1; + // Count actual bytes in each character + for (i = 0; i < E.row[j].size; i++) { + tot_len += E.row[j].chars[i].len; + } + tot_len++; // For newline } + *buffer_len = tot_len; buf = malloc(tot_len); + if (!buf) return NULL; + p = buf; for (j = 0; j < E.numrows; ++j) { - memcpy(p, E.row[j].chars, E.row[j].size); - p += E.row[j].size; - *p = '\n'; - p++; + // Copy each character's bytes + for (i = 0; i < E.row[j].size; i++) { + for (int k = 0; k < E.row[j].chars[i].len; k++) { + *p++ = E.row[j].chars[i].c[k]; + } + } + *p++ = '\n'; } return buf; } void editorCloseFile(void) { + // Free all rows + for (int i = 0; i < E.numrows; i++) { + editorFreeRow(&E.row[i]); + } + E.cursor_x = 0; E.cursor_y = 0; E.rx = 0; E.row_offset = 0; E.col_offset = 0; E.numrows = 0; + free(E.row); E.row = NULL; E.dirty = 0; + free(E.filename); E.filename = NULL; E.status_msg[0] = '\0'; E.status_msg_time = 0; @@ -56,25 +76,29 @@ void editorOpen(char *filename) { // Test if a file is already open if (E.filename != NULL) { editorCloseFile(); - E.state = READ_AND_WRITE; } + E.state = READ_AND_WRITE; - free(E.filename); E.filename = strdup(filename); - fp = fopen(filename, "a+"); - if (!fp) - die("fopen"); + fp = fopen(filename, "r"); + if (!fp) { + // File doesn't exist - that's okay, we'll create it on save + E.dirty = 0; + return; + } char *line = NULL; size_t line_cap = 0; ssize_t line_len; while ((line_len = getline(&line, &line_cap, fp)) != -1) { + // Strip newline characters while (line_len > 0 && (line[line_len - 1] == '\n' || line[line_len - 1] == '\r')) { --line_len; } + // editorInsertRow will convert bytes to utf_8_char_t editorInsertRow(E.numrows, line, line_len); } free(line); @@ -86,6 +110,7 @@ void editorSave() { int len; char *buf; int fd; + if (E.filename == NULL) { E.filename = editorPrompt("Save as: %s (ESC to cancel)", "", 1); if (E.filename == NULL) { @@ -93,38 +118,100 @@ void editorSave() { return; } } + buf = editorRowsToString(&len); - fd = open(E.filename, O_RDWR | O_CREAT, 0644); + if (!buf) { + editorSetStatusMessage("Can't save! Memory error"); + return; + } + + fd = open(E.filename, O_RDWR | O_CREAT | O_TRUNC, 0644); if (fd != -1) { - if (ftruncate(fd, len) != -1) { - if (write(fd, buf, len) == len) { - close(fd); - free(buf); - E.dirty = 0; - editorSetStatusMessage("%d bytes written to disk", len); - return; - } + if (write(fd, buf, len) == len) { + close(fd); + free(buf); + E.dirty = 0; + editorSetStatusMessage("%d bytes written to disk", len); + return; } close(fd); } + free(buf); editorSetStatusMessage("Can't save! I/O error: %s", strerror(errno)); } -void editorFind() { - char *query = editorPrompt("Search: %s (ESC to cancel)", "", 0); - if (query == NULL) return; - int i; - for (i = E.cursor_y + 1; i < E.numrows; i++) { - erow *row = &E.row[i]; - char *match = strstr(row->render, query); - if (match) { - E.cursor_y = i; - E.cursor_x = editorRowRxToCx(row, match - row->render); - E.row_offset = E.numrows; - break; +// Helper to convert utf_8_char_t array to byte string for searching +static char *row_to_string(erow *row) { + // Calculate byte length + int byte_len = 0; + for (int i = 0; i < row->rsize; i++) { + byte_len += row->render[i].len; } - } - free(query); + + char *str = malloc(byte_len + 1); + if (!str) return NULL; + + // Convert to bytes + int pos = 0; + for (int i = 0; i < row->rsize; i++) { + for (int j = 0; j < row->render[i].len; j++) { + str[pos++] = row->render[i].c[j]; + } + } + str[pos] = '\0'; + + return str; } +void editorFind() { + char *query = editorPrompt("Search: %s (ESC to cancel)", "", 0); + if (query == NULL) return; + + int saved_cursor_x = E.cursor_x; + int saved_cursor_y = E.cursor_y; + int saved_row_offset = E.row_offset; + int saved_col_offset = E.col_offset; + + // Search from current position forward + for (int i = E.cursor_y; i < E.numrows; i++) { + erow *row = &E.row[i]; + + // Convert row to byte string for searching + char *render_str = row_to_string(row); + if (!render_str) continue; + + char *match = strstr(render_str, query); + if (match) { + E.cursor_y = i; + + // Find the character index from byte position + int byte_pos = match - render_str; + int char_idx = 0; + int current_byte = 0; + + for (char_idx = 0; char_idx < row->rsize; char_idx++) { + if (current_byte >= byte_pos) break; + current_byte += row->render[char_idx].len; + } + + E.cursor_x = editorRowRxToCx(row, char_idx); + E.row_offset = E.numrows; // Force scroll + + free(render_str); + free(query); + return; + } + + free(render_str); + } + + // Not found - restore cursor position + E.cursor_x = saved_cursor_x; + E.cursor_y = saved_cursor_y; + E.row_offset = saved_row_offset; + E.col_offset = saved_col_offset; + + editorSetStatusMessage("Not found: %s", query); + free(query); +} diff --git a/src/init.c b/src/init.c index 00df766..a0e5ac0 100644 --- a/src/init.c +++ b/src/init.c @@ -86,6 +86,6 @@ void initEditor() { (int)lisp_eval(lisp_read("QUIT-TIMES", &E.ctx_error, E.ctx), &E.ctx_error, E.ctx) .val.int_val; - + fprintf(stderr, "Tab %d\n", E.constantes.QUIT_TIMES); E.quit_times_buffer = E.constantes.QUIT_TIMES; } diff --git a/src/input.c b/src/input.c index b1bdbca..0632fff 100644 --- a/src/input.c +++ b/src/input.c @@ -1,200 +1,147 @@ #include "../include/input.h" +#include "../include/define.h" #include "../include/editor_op.h" #include "../include/output.h" -#include "../include/define.h" +#include "include/data.h" #include -#include #include #include #include #include #include +#include #include extern struct editorConfig E; -char * file_completion(const char *path) { - DIR * dir; - struct dirent *entry; - char directory[128]; - char predict[128]; - int predict_len = 0; +char *file_completion(const char *path) { + DIR *dir; + struct dirent *entry; + char directory[128]; + char predict[128]; + int predict_len = 0; if (path[strlen(path) - 1] == '/') { - return path; + return strdup(path); } - // Find dir name - char * last_slash = strrchr(path, '/'); - if (last_slash) { - size_t dir_len = last_slash - path + 1; // length of dir_path - strncpy(directory, path, dir_len); - predict_len = strlen(path) - dir_len - 1; - strncpy(predict, last_slash + 1, predict_len); - directory[dir_len] = '\0'; - predict[predict_len] = '\0'; - fprintf(stderr, "%s %s\n", directory, predict); - } else { - return NULL; - } - + // Find dir name + char *last_slash = strrchr(path, '/'); + if (last_slash) { + size_t dir_len = last_slash - path + 1; + strncpy(directory, path, dir_len); + predict_len = strlen(path) - dir_len; + strncpy(predict, last_slash + 1, predict_len); + directory[dir_len] = '\0'; + predict[predict_len] = '\0'; + } else { + return NULL; + } dir = opendir(directory); if (!dir) - return NULL; - - while ((entry = readdir(dir)) != NULL) { - if (strncmp(entry->d_name, predict, predict_len) == 0) { - static char full_path[128]; - snprintf(full_path, sizeof(full_path), "%s%s", directory, entry->d_name); - - struct stat st; - if (stat(full_path, &st) == 0 && S_ISDIR(st.st_mode)) { - strcat(full_path, "/"); // add slash for directories - } - - return strdup(full_path); - } - } - - // Cleanup when no more entries - closedir(dir); - dir = NULL; return NULL; + while ((entry = readdir(dir)) != NULL) { + if (strncmp(entry->d_name, predict, predict_len) == 0) { + static char full_path[128]; + snprintf(full_path, sizeof(full_path), "%s%s", directory, entry->d_name); + + struct stat st; + if (stat(full_path, &st) == 0 && S_ISDIR(st.st_mode)) { + strcat(full_path, "/"); + } + closedir(dir); + return strdup(full_path); + } + } + + closedir(dir); + return NULL; } /** * \fn char * editorPrompt(struct editorConfig *E, char *prompt, char bPathMode) * \brief Return user input in a prompt when enter is hit. */ -char *editorPrompt(char *prompt, char * placeHolder, char bPathMode) { +char *editorPrompt(char *prompt, char *placeHolder, char bPathMode) { size_t buf_size = 128; char *buf = malloc(buf_size); size_t buf_len = 0; - int c = 0; buf[0] = '\0'; - strcpy(buf, placeHolder); - buf_len = strlen(placeHolder); + strcpy(buf, placeHolder); + buf_len = strlen(placeHolder); while (1) { - editorSetStatusMessage(prompt, buf); + editorSetStatusMessage(prompt, buf); editorRefreshScreen(); - c = editorReadKey(); - if (c == DEL_KEY || c == CTRL_KEY('h') || c == BACKSPACE) { + + KeyInfo *key = editorReadKey(); + + // Handle backspace/delete + if (key->type == KEY_SPECIAL && (key->data.special == 127 || key->data.special == 8)) { if (buf_len != 0) { buf[--buf_len] = '\0'; } - } else if (c == ESCAPE) { + } + // Handle Ctrl+H (backspace) + else if (key->type == KEY_CTRL && key->data.ctrl_char == 'H') { + if (buf_len != 0) { + buf[--buf_len] = '\0'; + } + } + // Handle ESC + else if (key->type == KEY_SPECIAL && key->data.special == 27) { editorSetStatusMessage(""); free(buf); return NULL; - } else if (c == '\r') { + } + // Handle Enter + else if (key->type == KEY_SPECIAL && (key->data.special == 13 || key->data.special == 10)) { if (buf_len != 0) { editorSetStatusMessage(""); return buf; } - } else if (bPathMode && c == '\t') { - char path[128]; - char * pwd; - if (buf[0] != '/') { - pwd = getenv("PWD"); - fprintf(stderr, "%s\n", pwd); - memcpy(path, pwd, strlen(pwd)); - path[strlen(pwd)] = '/'; - strncat(path, buf, buf_len); - } else { - strcpy(path, buf); - } - memset(buf, 0, 128); - buf_len = 0; - strcpy(buf, file_completion(path)); - buf_len = strlen(buf); - buf[buf_len] = '\0'; - - } else if (!iscntrl(c) && c < 128) { + } + // Handle Tab for path completion + else if (bPathMode && key->type == KEY_SPECIAL && key->data.special == 9) { + char path[128]; + char *pwd; + if (buf[0] != '/') { + pwd = getenv("PWD"); + snprintf(path, sizeof(path), "%s/%s", pwd, buf); + } else { + strcpy(path, buf); + } + + char *completion = file_completion(path); + if (completion) { + memset(buf, 0, buf_size); + strcpy(buf, completion); + buf_len = strlen(buf); + free(completion); + } + } + // Handle regular characters (ASCII only for prompts) + else if (key->type == KEY_CHAR && key->data.codepoint < 128) { if (buf_len == buf_size - 1) { buf_size *= 2; buf = realloc(buf, buf_size); } - buf[buf_len++] = c; + buf[buf_len++] = (char)key->data.codepoint; buf[buf_len] = '\0'; } } } -char *key_to_string(int key) { - static char key_str[32]; - - char tmp[10]; - sprintf(tmp, "%d", key); - - - // First test enter key - - if (key == '\r') { - strcpy(key_str, "ENTER"); - } else if (key >= 1 && key <= 26) { // CTRL keys - snprintf(key_str, sizeof(key_str), "CTRL-%c", 'a' + key - 1); - } else { - switch (key) { - case ARROW_UP: - strcpy(key_str, "ARROW-UP"); - break; - case ARROW_DOWN: - strcpy(key_str, "ARROW-DOWN"); - break; - case ARROW_LEFT: - strcpy(key_str, "ARROW-LEFT"); - break; - case ARROW_RIGHT: - strcpy(key_str, "ARROW-RIGHT"); - break; - case PAGE_UP: - strcpy(key_str, "PAGE-UP"); - fprintf(stderr, "pagr up\n"); - break; - case PAGE_DOWN: - strcpy(key_str, "PAGE-DOWN"); - break; - case DEL_KEY: - fprintf(stderr, "delete key\n"); - strcpy(key_str, "DEL"); - - break; - case BACKSPACE: - strcpy(key_str, "BACKSPACE"); - break; - case '\r': - strcpy(key_str, "ENTER"); - break; - case '\x1b': - strcpy(key_str, "ESCAPE"); - break; - case BEG_LINE: - strcpy(key_str, "HOME"); - break; - case END_LINE: - strcpy(key_str, "END"); - break; - default: - // For regular characters - if (isprint(key)) { - snprintf(key_str, sizeof(key_str), "%c", key); - } else { - snprintf(key_str, sizeof(key_str), "KEY-%d", key); - } - } - } - return key_str; -} - - -void editorMoveCursor(int key) { +void editorMoveCursor(KeyInfo *key) { + if (key->type != KEY_ARROW) return; + erow *row = (E.cursor_y >= E.numrows) ? NULL : &E.row[E.cursor_y]; int row_len; - switch (key) { - case ARROW_RIGHT: + + switch (key->data.arrow) { + case 'C': // Right if (row && E.cursor_x < row->size) { ++E.cursor_x; } else if (row && E.cursor_x == row->size) { @@ -202,17 +149,17 @@ void editorMoveCursor(int key) { E.cursor_x = 0; } break; - case ARROW_DOWN: + case 'B': // Down if (E.cursor_y < E.numrows) { ++E.cursor_y; } break; - case ARROW_UP: + case 'A': // Up if (E.cursor_y != 0) { --E.cursor_y; } break; - case ARROW_LEFT: + case 'D': // Left if (E.cursor_x != 0) { --E.cursor_x; } else if (E.cursor_y > 0) { @@ -229,28 +176,73 @@ void editorMoveCursor(int key) { } } -int executeKeyBind(char *key_sequence) { - int i; - for (i = 0; i < E.number_of_keybinds; ++i) { - if (!strcmp(key_sequence, E.key_binds[i].key_sequence)) { +KeyInfo *stringToCodepoint(const char *string) { + KeyInfo *key = (KeyInfo *)malloc(sizeof(KeyInfo)); + // test control key + if (!strncmp("CTRL", string, 4)) { + key->type = KEY_CTRL; + key->data.ctrl_char = toupper(string[6]) + 64; + } else if (!strncmp("ARROW", string, 5)) { + key->type = KEY_ARROW; + if (!strcmp("UP", string + 7)) { + key->data.arrow = 'A'; + } else if (!strcmp("DOWN", string + 7)) { + key->data.arrow = 'B'; + } else if (!strcmp("RIGHT", string + 7)) { + key->data.arrow = 'C'; + } else if (!strcmp("LEFT", string + 7)) { + key->data.arrow = 'D'; + } + } + + return key; +} + +static int key_match(KeyInfo *a, KeyInfo *b) { + if (a->type != b->type) return 0; + if (a->modifiers != b->modifiers) return 0; - fprintf(stderr, "lisp function %s\n", key_sequence); - // It's a symbol, create a function call - lisp_eval(lisp_cons(E.key_binds[i].command, lisp_null(), E.ctx), - &E.ctx_error, E.ctx); - return 1; + switch (a->type) { + case KEY_CTRL: + return toupper(a->data.ctrl_char) == toupper(b->data.ctrl_char); + case KEY_ALT: + return a->data.alt_char == b->data.alt_char; + case KEY_ARROW: + return a->data.arrow == b->data.arrow; + case KEY_FUNCTION: + return a->data.function_num == b->data.function_num; + case KEY_CHAR: + return a->data.codepoint == b->data.codepoint; + case KEY_SPECIAL: + case KEY_NAVIGATION: + return a->data.special == b->data.special; + default: + return 0; + } +} + +int executeKeyBind(KeyInfo *key_sequence) { + for (int i = 0; i < E.number_of_keybinds; ++i) { + fprintf(stderr, "Keybind found\n"); + if (key_match(key_sequence, E.key_binds[i].key_sequence)) { + // Execute the lisp command + lisp_eval(lisp_cons(E.key_binds[i].command, lisp_null(), E.ctx), + &E.ctx_error, E.ctx); + return 1; } } return 0; } void editorProcessKeypress() { - int c = editorReadKey(); + KeyInfo *key = editorReadKey(); + if (!key) + return; - if (executeKeyBind(key_to_string(c))) { + if (executeKeyBind(key)) { + fprintf(stderr, "Keybinds found\n"); return; } - editorInsertChar(c); + editorInsertChar(&key->c); E.quit_times_buffer = E.constantes.QUIT_TIMES; - } diff --git a/src/output.c b/src/output.c index a8a896a..b337628 100644 --- a/src/output.c +++ b/src/output.c @@ -6,6 +6,17 @@ extern struct editorConfig E; +static void utf8_to_bytes(utf_8_char_t *chars, int count, unsigned char *output, int *output_len) { + int pos = 0; + for (int i = 0; i < count; i++) { + for (int j = 0; j < chars[i].len; j++) { + output[pos++] = chars[i].c[j]; + } + fprintf(stderr, "bytes length : %s %d\n", chars[i].c, pos); + } + *output_len = pos; +} + void editorDrawRows(struct abuf *ab) { int y; char welcome[80]; @@ -41,7 +52,16 @@ void editorDrawRows(struct abuf *ab) { len = 0; if (len > E.screencols) len = E.screencols; - abAppend(ab, &E.row[file_row].render[E.col_offset], len); + if (len > 0) { + unsigned char *display_buf = malloc(len * 4); // Max 4 bytes per char + int byte_len; + + utf8_to_bytes(&E.row[file_row].render[E.col_offset], len, display_buf, + &byte_len); + abAppend(ab, display_buf, byte_len); + fprintf(stderr, "display buffer : %s %d\n", display_buf, byte_len); + free(display_buf); + } } abAppend(ab, ERASE_END_LINE, 3); abAppend(ab, "\r\n", 2); diff --git a/src/row_op.c b/src/row_op.c index 29bd22d..1650f68 100644 --- a/src/row_op.c +++ b/src/row_op.c @@ -1,4 +1,6 @@ #include "../include/row_op.h" +#include "include/data.h" +#include "include/define.h" #include #include #include @@ -6,11 +8,29 @@ extern struct editorConfig E; +static int is_tab(utf_8_char_t *ch) { + return ch->len == 1 && ch->c[0] == '\t'; +} + +// Helper function to check if two utf_8_char_t are equal +static int utf8_char_equal(utf_8_char_t *a, utf_8_char_t *b) { + if (a->len != b->len) return 0; + return memcmp(a->c, b->c, a->len) == 0; +} + +// Helper function to create a space character +static utf_8_char_t make_space() { + utf_8_char_t space; + space.c[0] = ' '; + space.len = 1; + return space; +} + int editorRowCxToRx(erow *row, int cursor_x) { int render_x = 0; int i; for (i = 0; i < cursor_x; ++i) { - if (row->chars[i] == '\t') { + if (is_tab(&row->chars[i])) { render_x += (E.constantes.TAB_LENGTH - 1) - (render_x % E.constantes.TAB_LENGTH); } render_x++; @@ -22,7 +42,7 @@ int editorRowRxToCx(erow *row, int rx) { int cur_rx = 0; int cx; for (cx = 0; cx < row->size; cx++) { - if (row->chars[cx] == '\t') + if (is_tab(&row->chars[cx])) cur_rx += (E.constantes.TAB_LENGTH - 1) - (cur_rx % E.constantes.TAB_LENGTH); cur_rx++; if (cur_rx > rx) return cx; @@ -39,40 +59,42 @@ void editorUpdateRow(erow *row) { int i, i_render; int tabs = 0; - // counting number of tabs - + // Count number of tabs for (i = 0; i < row->size; ++i) { - tabs += - (row->chars[i] == '\t'); /**< increment tabs of 1 if chars[i] is one. */ + if (is_tab(&row->chars[i])) { + tabs++; + } } free(row->render); - row->render = malloc(row->size + tabs * (E.constantes.TAB_LENGTH - 1) + - 1); /**< Tabs needs E.constantes.TAB_LENGTH chars so E.constantes.TAB_LENGTH - 1 - more than the first already counted. */ + // Allocate space for utf_8_char_t array + row->render = malloc(sizeof(utf_8_char_t) * (row->size + tabs * (E.constantes.TAB_LENGTH - 1))); + + if (!row->render) { + row->rsize = 0; + return; + } - // end of counting i_render = 0; for (i = 0; i < row->size; ++i) { - if (row->chars[i] == '\t') { - row->render[i_render++] = ' '; + if (is_tab(&row->chars[i])) { + // Replace tab with spaces + row->render[i_render++] = make_space(); while (i_render % E.constantes.TAB_LENGTH) { - row->render[i_render++] = - ' '; /**< Addind the right amount of spaces for tabs */ + row->render[i_render++] = make_space(); } } else { row->render[i_render++] = row->chars[i]; } } - row->render[i_render] = '\0'; // Don't forget the end of string character. row->rsize = i_render; } void editorInsertRow(int at, char *s, size_t len) { if (at < 0 || at > E.numrows) { - return; } + erow *tmp = (erow *)realloc(E.row, sizeof(erow) * (E.numrows + 1)); if (!tmp) { return; @@ -80,19 +102,78 @@ void editorInsertRow(int at, char *s, size_t len) { E.row = tmp; memmove(&E.row[at + 1], &E.row[at], sizeof(erow) * (E.numrows - at)); - E.row[at].size = len; - E.row[at].chars = malloc(len + 1); - memcpy(E.row[at].chars, s, len); - E.row[at].chars[len] = '\0'; - + // Initialize the new row + E.row[at].size = 0; + E.row[at].chars = NULL; E.row[at].rsize = 0; E.row[at].render = NULL; + + // Count UTF-8 characters first + int char_count = 0; + int i = 0; + while (i < len) { + unsigned char first = (unsigned char)s[i]; + int char_len; + + if ((first & 0x80) == 0) { + char_len = 1; + } else if ((first & 0xE0) == 0xC0) { + char_len = 2; + } else if ((first & 0xF0) == 0xE0) { + char_len = 3; + } else if ((first & 0xF8) == 0xF0) { + char_len = 4; + } else { + char_len = 1; // Invalid, treat as single byte + } + + i += char_len; + char_count++; + } + + // Allocate for the actual number of characters + if (char_count > 0) { + E.row[at].chars = malloc(sizeof(utf_8_char_t) * char_count); + if (!E.row[at].chars) { + return; + } + } + + // Now convert to utf_8_char_t array + i = 0; + E.row[at].size = 0; + while (i < len && E.row[at].size < char_count) { + utf_8_char_t ch; + + unsigned char first = (unsigned char)s[i]; + if ((first & 0x80) == 0) { + ch.len = 1; + } else if ((first & 0xE0) == 0xC0) { + ch.len = 2; + } else if ((first & 0xF0) == 0xE0) { + ch.len = 3; + } else if ((first & 0xF8) == 0xF0) { + ch.len = 4; + } else { + ch.len = 1; + } + + // Copy bytes + for (int j = 0; j < ch.len && i < len; j++) { + ch.c[j] = s[i++]; + } + + E.row[at].chars[E.row[at].size++] = ch; + } + editorUpdateRow(&E.row[at]); + ++E.numrows; ++E.dirty; } + void editorFreeRow(erow *row) { free(row->render); free(row->chars); @@ -112,16 +193,17 @@ void editorDelRow(int at) { * \fn editorRowInsertChar(erow *row, int at, int c) * \param at Index of where we want to insert the char */ -void editorRowInsertChar(erow *row, int at, int c) { +void editorRowInsertChar(erow *row, int at, utf_8_char_t c) { if (E.state == READ_ONLY) return; if (at < 0 || at > row->size) { at = row->size; } - row->chars = realloc(row->chars, row->size + 2); + row->chars = realloc(row->chars, row->size + 1); memmove(&row->chars[at + 1], &row->chars[at], row->size - at + 1); - ++row->size; + ++(row->size); row->chars[at] = c; + fprintf(stderr, "Row insert : %s %d\n", c.c, c.len); editorUpdateRow(row); ++E.dirty; } @@ -130,7 +212,6 @@ void editorRowAppendString(erow *row, char *s, size_t len) { row->chars = realloc(row->chars, row->size + len + 1); memcpy(&row->chars[row->size], s, len); row->size += len; - row->chars[row->size] = '\0'; editorUpdateRow(row); ++E.dirty; } diff --git a/src/terminal.c b/src/terminal.c index 521958a..29626ba 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -2,6 +2,8 @@ #include "../include/data.h" #include +#include +#include void die(const char *s) { write(STDOUT_FILENO, "\x1b[2J", 4); @@ -35,73 +37,211 @@ void enableRawMode() { } } -int editorReadKey() { - int nread; - char c; - char seq[3]; - while ((nread = read(STDIN_FILENO, &c, 1)) != 1) { - if (nread == -1 && errno != EAGAIN) { - die("read"); +int utf8_char_length(unsigned char first_byte) { + if ((first_byte & 0x80) == 0) + return 1; // 0xxxxxxx - ASCII + if ((first_byte & 0xE0) == 0xC0) + return 2; // 110xxxxx - 2 bytes + if ((first_byte & 0xF0) == 0xE0) + return 3; // 1110xxxx - 3 bytes + if ((first_byte & 0xF8) == 0xF0) + return 4; // 11110xxx - 4 bytes + return 1; // Invalid, treat as single byte +} + +// Convert UTF-8 to Unicode code point +unsigned int utf8_to_codepoint(const unsigned char *bytes, int len) { + if (len == 1) + return bytes[0]; + if (len == 2) + return ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F); + if (len == 3) + return ((bytes[0] & 0x0F) << 12) | ((bytes[1] & 0x3F) << 6) | + (bytes[2] & 0x3F); + if (len == 4) + return ((bytes[0] & 0x07) << 18) | ((bytes[1] & 0x3F) << 12) | + ((bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F); + return 0; +} + +void parse_key(unsigned char *seq, int len, KeyInfo *key) { + memcpy(key->c.c, seq, len); + key->c.len = len; + key->modifiers = MOD_NONE; + key->type = KEY_UNKNOWN; + + // Control characters (Ctrl+A to Ctrl+Z) + if (len == 1 && seq[0] < 32 && seq[0] != 27 && seq[0] != 9 && seq[0] != 10 && + seq[0] != 13) { + key->type = KEY_CTRL; + key->data.ctrl_char = seq[0] + 64; + return; + } + + // Special single characters + if (len == 1) { + switch (seq[0]) { + case 9: + case 10: + case 13: + case 27: + case 127: + key->type = KEY_SPECIAL; + key->data.special = seq[0]; + return; } } - if (c == '\x1b') { - if (read(STDIN_FILENO, &seq[0], 1) != 1 || - read(STDIN_FILENO, &seq[1], 1) != 1) { - return '\x1b'; + // Escape sequences + if (len >= 2 && seq[0] == 27) { + // Alt+key combinations + if (len == 2 && seq[1] >= 32 && seq[1] < 127) { + key->type = KEY_ALT; + key->data.alt_char = seq[1]; + return; } - if (seq[0] == '[') { - if (seq[1] >= '0' && seq[1] <= '9') { - if (read(STDIN_FILENO, &seq[2], 1) != 1) { - return '\x1b'; + + // CSI sequences (ESC [ ...) + if (len >= 3 && seq[1] == '[') { + // Arrow keys + if (len == 3) { + switch (seq[2]) { + case 'A': + case 'B': + case 'C': + case 'D': + key->type = KEY_ARROW; + key->data.arrow = seq[2]; + return; + case 'H': + case 'F': + key->type = KEY_NAVIGATION; + key->data.special = seq[2]; + return; } - if (seq[2] == '~') { - switch (seq[1]) { - case '1': - return BEG_LINE; - case '3': - return DEL_KEY; - case '4': - return END_LINE; - case '5': - return PAGE_UP; - case '6': - return PAGE_DOWN; - case '7': - return BEG_LINE; - case '8': - return END_LINE; + } + + // Modified keys (ESC [ 1 ; modifier letter) + if (len >= 6 && seq[2] == '1' && seq[3] == ';') { + int modifier = seq[4] - '0'; + char k = seq[5]; + + if (modifier & 1) + key->modifiers |= MOD_SHIFT; + if (modifier & 2) + key->modifiers |= MOD_ALT; + if (modifier & 4) + key->modifiers |= MOD_CTRL; + + switch (k) { + case 'A': + case 'B': + case 'C': + case 'D': + key->type = KEY_ARROW; + key->data.arrow = k; + return; + case 'H': + case 'F': + key->type = KEY_NAVIGATION; + key->data.special = k; + return; + } + } + + // Function keys and navigation + if (len == 4 && seq[3] == '~') { + int num = seq[2] - '0'; + if (num >= 1 && num <= 6) { + key->type = KEY_NAVIGATION; + key->data.special = seq[2]; + return; + } + } + + if (len == 5 && seq[4] == '~') { + int num = (seq[2] - '0') * 10 + (seq[3] - '0'); + if (num >= 15 && num <= 24) { + key->type = KEY_FUNCTION; + // Map to F5-F12 + int f_map[] = {15, 17, 18, 19, 20, 21, 23, 24}; + for (int i = 0; i < 8; i++) { + if (f_map[i] == num) { + key->data.function_num = i + 5; + return; + } } } - } else { - - switch (seq[1]) { - case 'A': - return ARROW_UP; - case 'B': - return ARROW_DOWN; - case 'C': - return ARROW_RIGHT; - case 'D': - return ARROW_LEFT; - case 'H': - return BEG_LINE; - case 'F': - return END_LINE; - } - } - } else if (seq[0] == 'O') { - switch (seq[1]) { - case 'H': - return BEG_LINE; - case 'F': - return END_LINE; } } - return '\x1b'; - } else { - return c; + + // SS3 sequences (ESC O ...) + if (len == 3 && seq[1] == 'O') { + switch (seq[2]) { + case 'P': + case 'Q': + case 'R': + case 'S': + key->type = KEY_FUNCTION; + key->data.function_num = seq[2] - 'P' + 1; + return; + case 'H': + case 'F': + key->type = KEY_NAVIGATION; + key->data.special = seq[2]; + return; + } + } } + + // UTF-8 character + if (seq[0] >= 32 || (seq[0] & 0x80)) { + int char_len = utf8_char_length(seq[0]); + fprintf(stderr, "char length : %d\n", char_len); + if (char_len <= len) { + key->type = KEY_CHAR; + memcpy(key->c.c, seq, len); + key->c.len = len; + return; + } + } +} + +KeyInfo *editorReadKey() { + fd_set fds; + int timeout_ms = 10; + struct timeval tv; + int total = 0; + KeyInfo *key = (KeyInfo *)malloc(sizeof(KeyInfo)); + int len; + unsigned char buffer[20]; + + if (read(STDIN_FILENO, &buffer[0], 1) <= 0) + return 0; + + while (total < 20) { + FD_ZERO(&fds); + FD_SET(STDIN_FILENO, &fds); + tv.tv_sec = 0; + tv.tv_usec = timeout_ms * 1000; + + int ret = select(STDIN_FILENO + 1, &fds, NULL, NULL, &tv); + if (ret <= 0) + break; + + if (read(STDIN_FILENO, &buffer[total], 1) <= 0) + break; + total++; + } + total++; + + parse_key(buffer, total, key); + + // DEBUG + + fprintf(stderr, "%s %d %d %s %d\n", buffer, buffer[0], buffer[1], key->c.c, key->c.len); + + return key; } int getCursorPosition(int *rows, int *cols) { From 8e1b4d2f8652020ca7079897b195ccd31e4da2e9 Mon Sep 17 00:00:00 2001 From: arthur barraux Date: Sun, 3 May 2026 23:32:40 +0200 Subject: [PATCH 2/2] utf8 processing without struct --- include/append_buffer.h | 2 +- include/builtins.h | 2 - include/data.h | 37 +---- include/define.h | 13 +- include/editor_op.h | 4 +- include/file_io.h | 3 - include/input.h | 6 +- include/row_op.h | 18 +-- include/terminal.h | 6 +- include/utf8.h | 16 ++ install.sh | 0 main.c | 3 - meson.build | 1 + src/append_buffer.c | 6 +- src/builtins.c | 69 +++------ src/editor_op.c | 114 ++++++--------- src/file_io.c | 186 +++++++++--------------- src/init.c | 3 +- src/input.c | 314 +++++++++++++++++----------------------- src/output.c | 87 ++++++----- src/row_op.c | 227 ++++++----------------------- src/terminal.c | 278 +++++++++++------------------------ src/utf8.c | 148 +++++++++++++++++++ 23 files changed, 637 insertions(+), 906 deletions(-) create mode 100644 include/utf8.h mode change 100755 => 100644 install.sh create mode 100644 src/utf8.c diff --git a/include/append_buffer.h b/include/append_buffer.h index dc266e4..3f28992 100644 --- a/include/append_buffer.h +++ b/include/append_buffer.h @@ -5,7 +5,7 @@ #include #include -void abAppend(struct abuf *ab, const unsigned char *s, int len); +void abAppend(struct abuf *ab, const char *s, int len); void abFree(struct abuf *ab); diff --git a/include/builtins.h b/include/builtins.h index 4c5b3ff..ce946d0 100644 --- a/include/builtins.h +++ b/include/builtins.h @@ -31,8 +31,6 @@ Lisp editorPrintC(Lisp args, LispError *e, LispContext ctx); Lisp addPackage(Lisp args, LispError *e, LispContext ctx); -Lisp editorDelRow_L(Lisp args, LispError *e, LispContext ctx); - Lisp editorFind_L(Lisp args, LispError *e, LispContext ctx); Lisp editorReadChar_L(Lisp args, LispError *e, LispContext ctx); diff --git a/include/data.h b/include/data.h index 68b236a..01d1617 100644 --- a/include/data.h +++ b/include/data.h @@ -8,23 +8,17 @@ #include "lisp.h" -typedef struct{ - unsigned char c[4]; - char len; -} utf_8_char_t; - /** - * \struct erow + * \struct row_t * \brief Store one editor row * \param * */ -typedef struct erow { +typedef struct row { int size; /**< Size of the line */ - int rsize; /**< Size of the render line */ - utf_8_char_t *chars; /**< Characters of the line */ - utf_8_char_t *render; /**< The actual line we will print */ -} erow; + int cap; /**< Size of the render line */ + char *chars; /**< Characters of the line */ +} row_t; enum editorStatus_e { IDLE, @@ -57,24 +51,9 @@ typedef enum { MOD_CTRL = 4 } KeyModifier; -// Key information structure -typedef struct { - KeyType type; - int modifiers; // Bitmask of KeyModifier - union { - unsigned int codepoint; // For KEY_CHAR - char ctrl_char; // For KEY_CTRL (A-Z) - char alt_char; // For KEY_ALT - char arrow; // For KEY_ARROW (U/D/L/R) - int function_num; // For KEY_FUNCTION (1-12) - char special; // For KEY_SPECIAL and KEY_NAVIGATION - } data; - utf_8_char_t c; // Raw bytes -} KeyInfo; - struct keyBind_t { - KeyInfo *key_sequence; + char *key_sequence; Lisp command; }; @@ -90,7 +69,7 @@ struct editorConfig { int screenrows; /**< Terminal height*/ int screencols; /**< Terminal width*/ int numrows; /**< Number of rows contained */ - erow *row; /**< Store all the rows printed */ + row_t *rows; /**< Store all the rows printed */ int dirty; char *filename; enum editorStatus_e state; @@ -118,7 +97,7 @@ struct editorConfig { * */ struct abuf { - unsigned char *b; /**< Text that will be printed */ + char *b; /**< Text that will be printed */ int len; /**< Length of the text */ }; diff --git a/include/define.h b/include/define.h index 17c0ec9..f3f0b18 100644 --- a/include/define.h +++ b/include/define.h @@ -11,7 +11,18 @@ #define TAB "\x09" #define SPACE "\x20" - +enum editorKey_e { + BACKSPACE = 127, + ARROW_LEFT = 1000, + ARROW_RIGHT, + ARROW_UP, + ARROW_DOWN, + DEL_KEY, + BEG_LINE, + END_LINE, + PAGE_UP, + PAGE_DOWN, + }; #define ABUF_INIT {NULL, 0} diff --git a/include/editor_op.h b/include/editor_op.h index a67c7df..104387e 100644 --- a/include/editor_op.h +++ b/include/editor_op.h @@ -2,9 +2,7 @@ #define EDITOR_OP_H_ #include "data.h" -void editorInsertChar(utf_8_char_t *c); - -void editorInsertNewLine(void); +void editorInsertChar(int c); void editorDelChar(); diff --git a/include/file_io.h b/include/file_io.h index f8ee453..7dc9902 100644 --- a/include/file_io.h +++ b/include/file_io.h @@ -8,9 +8,6 @@ #include #include -char *editorRowsToString(int *buffer_len); - - void editorCloseFile(void); void editorOpen(char *filename); diff --git a/include/input.h b/include/input.h index 8799904..29d35c6 100644 --- a/include/input.h +++ b/include/input.h @@ -22,11 +22,9 @@ char *editorPrompt(char *prompt, char * PlaceHolder, char bPathMode); -char *key_to_string(int key); +void editorMoveCursor(int key); -void editorMoveCursor(KeyInfo * key); - -int executeKeyBind(KeyInfo *key_sequence); +int executeKeyBind(char *key_sequence); /** * \fn void editorProcessKeypress() diff --git a/include/row_op.h b/include/row_op.h index fe350b9..c68f1f7 100644 --- a/include/row_op.h +++ b/include/row_op.h @@ -8,22 +8,16 @@ #include #include -int editorRowCxToRx(erow *row, int cursor_x); +void editorInsertRow(int at, char *s, int len); -int editorRowRxToCx(erow *row, int rx); +void editorFreeRow(row_t *row); -void editorUpdateRow(erow *row); +int editorRowCxToByte(const row_t *row, int cursor_x); -void editorInsertRow(int at, char *s, size_t len); +int editorRowCharCount(row_t *row); -void editorFreeRow(erow *row); +void editorRowInsertBytes(row_t *row, int at, const char *src, int len); -void editorDelRow(int at); - -void editorRowInsertChar(erow *row, int at, utf_8_char_t c); - -void editorRowAppendString(erow *row, char *s, size_t len); - -void editorRowDelchar(erow *row, int at); +void editorRowDelByte(row_t *row, int at, int n); #endif // ROW_OP_H_ diff --git a/include/terminal.h b/include/terminal.h index d61a569..416b7fa 100644 --- a/include/terminal.h +++ b/include/terminal.h @@ -25,12 +25,12 @@ void disableRawMode(); void enableRawMode(); -KeyInfo * editorReadKey(); +int editorReadKey(); int getCursorPosition(int *rows, int *cols); -KeyInfo *stringToCodepoint(const char *string); - int getWindowSize(int *rows, int *cols); +char *key_to_string(int key); + #endif diff --git a/include/utf8.h b/include/utf8.h new file mode 100644 index 0000000..2c3f425 --- /dev/null +++ b/include/utf8.h @@ -0,0 +1,16 @@ +// +// Created by Giorgio on 01/05/2026. +// + +#ifndef BELUGA_UTF8_H +#define BELUGA_UTF8_H +#include + +uint32_t readUtf8Char(void); +int utf8Encode(uint32_t cp, char *buf); +int utf8Seqlen(unsigned char c); +int codepointWidth(uint32_t codepoint); +uint32_t utf8Decode(const char** s); + + +#endif //BELUGA_UTF8_H diff --git a/install.sh b/install.sh old mode 100755 new mode 100644 diff --git a/main.c b/main.c index 2cd2716..87451df 100644 --- a/main.c +++ b/main.c @@ -30,15 +30,12 @@ int main(int argc, char *argv[]) { char * splash_screen = (char *) calloc(256, sizeof(char)); - // Set support for utf-8 - setlocale(LC_ALL, ""); // INIT enableRawMode(); initEditor(); if (argc >= 2) { - E.state = READ_AND_WRITE; editorOpen(argv[1]); } else { strcat(splash_screen, getenv("HOME")); diff --git a/meson.build b/meson.build index 37ebd43..1a62b17 100644 --- a/meson.build +++ b/meson.build @@ -20,6 +20,7 @@ src_files = files( 'src/row_op.c', 'src/terminal.c', 'src/builtins.c', + 'src/utf8.c' ) # Executable diff --git a/src/append_buffer.c b/src/append_buffer.c index 6afb831..38cdc05 100644 --- a/src/append_buffer.c +++ b/src/append_buffer.c @@ -1,9 +1,7 @@ #include "../include/append_buffer.h" -extern struct editorConfig E; - -void abAppend(struct abuf *ab, const unsigned char *s, int len) { - unsigned char *new = realloc(ab->b, ab->len + len); +void abAppend(struct abuf *ab, const char *s, int len) { + char *new = realloc(ab->b, ab->len + len); if (new == NULL) { return; diff --git a/src/builtins.c b/src/builtins.c index b580197..28e69ad 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -5,31 +5,22 @@ #include "../include/editor_op.h" #include "../include/row_op.h" #include "../include/data.h" -#include "../include/terminal.h" #include #include #include -utf_8_char_t make_utf8_char(const char *bytes, int len) { - utf_8_char_t ch; - ch.len = len; - memcpy(ch.c, bytes, len); - return ch; -} - Lisp mapKey(Lisp args, LispError *e, LispContext ctx) { - const char *key_string = lisp_string(lisp_car(args)); - KeyInfo *key = stringToCodepoint(key_string); + const char *key_sequence = lisp_string(lisp_car(args)); args = lisp_cdr(args); // second argument Lisp func = lisp_car(args); E.key_binds = (struct keyBind_t *)realloc(E.key_binds, ++E.number_of_keybinds * sizeof(struct keyBind_t)); - E.key_binds[E.number_of_keybinds - 1].key_sequence = (KeyInfo *) malloc(sizeof(KeyInfo)); + E.key_binds[E.number_of_keybinds - 1].key_sequence = (char *) malloc(50 * sizeof(char)); - memcpy(E.key_binds[E.number_of_keybinds - 1].key_sequence, key, sizeof(KeyInfo)); + strncpy(E.key_binds[E.number_of_keybinds - 1].key_sequence, key_sequence, 50); E.key_binds[E.number_of_keybinds - 1].command = func; @@ -38,30 +29,26 @@ Lisp mapKey(Lisp args, LispError *e, LispContext ctx) { Lisp moveCursor(Lisp args, LispError *e, LispContext ctx) { const char *direction = lisp_string(lisp_car(args)); - KeyInfo key; - key.type = KEY_ARROW; switch (direction[0]) { case 'u': - key.data.arrow = 'A'; + editorMoveCursor(ARROW_UP); break; case 'd': - key.data.arrow = 'B'; + editorMoveCursor(ARROW_DOWN); break; case 'r': - key.data.arrow = 'C'; + editorMoveCursor(ARROW_RIGHT); break; case 'l': - key.data.arrow = 'D'; + editorMoveCursor(ARROW_LEFT); break; } - editorMoveCursor(&key); return lisp_null(); } Lisp editorQuit(Lisp args, LispError* e, LispContext ctx) { - fprintf(stderr, "quit\n"); if (E.dirty && E.quit_times_buffer > 0) { editorSetStatusMessage("WARNING! Changes hasn't been saved. Press Ctrl-Q " "another time to quit."); @@ -75,26 +62,27 @@ Lisp editorQuit(Lisp args, LispError* e, LispContext ctx) { return lisp_null(); - + } Lisp l_editorSave(Lisp args, LispError* e, LispContext ctx) { editorSave(); - + return lisp_null(); - + } Lisp l_editorInsertNewLine(Lisp args, LispError* e, LispContext ctx) { - // editorInsertNewLine(); - + editorInsertRow(E.numrows,"", 0); + editorMoveCursor(ARROW_DOWN); + return lisp_null(); - + } Lisp moveCursorBeginLine(Lisp args, LispError *e, LispContext ctx) { @@ -104,7 +92,7 @@ Lisp moveCursorBeginLine(Lisp args, LispError *e, LispContext ctx) { Lisp moveCursorEndLine(Lisp args, LispError* e, LispContext ctx) { if (E.cursor_y < E.numrows) { - E.cursor_x = E.row[E.cursor_y].size; + E.cursor_x = E.rows[E.cursor_y].size; } return lisp_null(); } @@ -118,11 +106,8 @@ Lisp deletePreviousChar(Lisp args, LispError* e, LispContext ctx) { Lisp editorMoveCursorPageUp(Lisp args, LispError* e, LispContext ctx) { E.cursor_y = E.row_offset; int times = E.screenrows; - KeyInfo key; - key.type = KEY_ARROW; - key.data.arrow = 'D'; while (--times) { - editorMoveCursor(&key); + editorMoveCursor(ARROW_UP); } return lisp_null(); } @@ -133,11 +118,8 @@ Lisp editorMoveCursorPageDown(Lisp args, LispError* e, LispContext ctx) { E.cursor_y = E.numrows; } int times = E.screenrows; - KeyInfo key; - key.type = KEY_ARROW; - key.data.arrow = 'D'; while (--times) { - editorMoveCursor(&key); + editorMoveCursor(ARROW_DOWN); } return lisp_null(); @@ -153,9 +135,8 @@ Lisp editorOpenFile(Lisp args, LispError *e, LispContext ctx) { Lisp editorPrintC(Lisp args, LispError *e, LispContext ctx) { - char *c = lisp_string(lisp_car(args)); - utf_8_char_t ch = make_utf8_char(c, 1); - editorInsertChar(&ch); + char c = lisp_string(lisp_car(args))[0]; + editorInsertChar(c); return lisp_null(); } @@ -172,14 +153,9 @@ Lisp addPackage(Lisp args, LispError *e, LispContext ctx) { E.ctx); fclose(fd_package); free(package_dir); - - return lisp_null(); - -} -Lisp editorDelRow_L(Lisp args, LispError *e, LispContext ctx) { - editorDelRow(E.cursor_y); return lisp_null(); + } Lisp editorFind_L(Lisp args, LispError *e, LispContext ctx) { @@ -188,8 +164,7 @@ Lisp editorFind_L(Lisp args, LispError *e, LispContext ctx) { } Lisp editorReadChar_L(Lisp args, LispError *e, LispContext ctx) { - // fprintf(stderr, "char read : %c\n", E.row[E.cursor_y].render[E.cursor_x]); - // return lisp_make_char(E.row[E.cursor_y].render[E.cursor_x]); - return lisp_null(); + fprintf(stderr, "char read : %c\n", E.rows[E.cursor_y].chars[E.cursor_x]); + return lisp_make_char(E.rows[E.cursor_y].chars[E.cursor_x]); } diff --git a/src/editor_op.c b/src/editor_op.c index cbc5921..a7012b8 100644 --- a/src/editor_op.c +++ b/src/editor_op.c @@ -1,94 +1,72 @@ #include "../include/editor_op.h" #include "../include/row_op.h" -#include "include/data.h" +#include "../include/data.h" #include +#include "../include/utf8.h" + extern struct editorConfig E; -void editorInsertChar(utf_8_char_t *c) { +void editorInsertChar(int c) { if (E.state == READ_ONLY) return; - fprintf(stderr, "Insert char %s %d\n", c->c, c->len); - // If cursor is past end of file, add empty rows - if (E.cursor_y == E.numrows) { + if (E.cursor_y == E.numrows) editorInsertRow(E.numrows, "", 0); + row_t *row = &E.rows[E.cursor_y]; + int byte = editorRowCxToByte(row, E.cursor_x); + char buf[4]; + int n; + if (c < 0x80) { + buf[0] = c; + n = 1; + } else { + n = utf8Encode((uint32_t)c, buf); } - - // Insert character at cursor position - editorRowInsertChar(&E.row[E.cursor_y], E.cursor_x, *c); + editorRowInsertBytes(row, byte, buf, n); E.cursor_x++; + E.dirty = 1; } -void editorInsertNewline(void) { +void editorInsertNewline(const char* s, int len) { if (E.state == READ_ONLY) return; - if (E.cursor_x == 0) { - // Insert blank line before current line - editorInsertRow(E.cursor_y, "", 0); - } else { - // Split current line at cursor - erow *row = &E.row[E.cursor_y]; - - // Calculate byte length of remaining part - int remaining_chars = row->size - E.cursor_x; - - // Allocate buffer for remaining characters - char *buf = malloc(remaining_chars * 4); // Max 4 bytes per UTF-8 char - int buf_len = 0; - - // Convert utf_8_char_t to bytes - for (int i = E.cursor_x; i < row->size; i++) { - for (int j = 0; j < row->chars[i].len; j++) { - buf[buf_len++] = row->chars[i].c[j]; - } - } - - // Insert new row with remaining text - editorInsertRow(E.cursor_y + 1, buf, buf_len); - free(buf); - - // Truncate current row at cursor - row = &E.row[E.cursor_y]; // Refresh pointer after realloc - row->size = E.cursor_x; - editorUpdateRow(row); - } - - E.cursor_y++; - E.cursor_x = 0; + E.rows = realloc(E.rows, sizeof(row_t) * (E.numrows + 1)); + row_t *r = &E.rows[E.numrows]; + r->cap = len + 1; + r->chars = malloc(r->cap); + memcpy(r->chars, s, len); + r->size = len; + r->chars[len] = '\0'; + E.numrows++; } -void editorRowAppendRow(erow *dest, erow *src) { - // Allocate space for combined rows - utf_8_char_t *new_chars = realloc(dest->chars, - sizeof(utf_8_char_t) * (dest->size + src->size)); - if (!new_chars) return; - - dest->chars = new_chars; - - // Copy source row characters - memcpy(&dest->chars[dest->size], src->chars, sizeof(utf_8_char_t) * src->size); - dest->size += src->size; - - editorUpdateRow(dest); - ++E.dirty; -} - -void editorDelChar(void) { +void editorDelChar(void) +{ if (E.state == READ_ONLY) return; if (E.cursor_y == E.numrows) return; if (E.cursor_x == 0 && E.cursor_y == 0) return; - - erow *row = &E.row[E.cursor_y]; - + + row_t *r = &E.rows[E.cursor_y]; if (E.cursor_x > 0) { - // Delete character before cursor - editorRowDelchar(row, E.cursor_x - 1); + /* find byte of previous char */ + int byte = editorRowCxToByte(r, E.cursor_x); + /* step back one character */ + int start = byte; + /* walk from beginning to find start of char at cx-1 */ + start = editorRowCxToByte(r, E.cursor_x - 1); + editorRowDelByte(r, start, byte - start); E.cursor_x--; + E.dirty = 1; } else { - // At beginning of line - join with previous line - E.cursor_x = E.row[E.cursor_y - 1].size; - editorRowAppendRow(&E.row[E.cursor_y - 1], row); - editorDelRow(E.cursor_y); + /* merge with previous row */ + row_t *prev = &E.rows[E.cursor_y - 1]; + E.cursor_x = editorRowCharCount(prev); + editorRowInsertBytes(prev, prev->size, r->chars, r->size); + free(r->chars); + memmove(&E.rows[E.cursor_y], &E.rows[E.cursor_y + 1], + sizeof(row_t) * (E.numrows - E.cursor_y - 1)); + E.numrows--; E.cursor_y--; + E.dirty = 1; } } diff --git a/src/file_io.c b/src/file_io.c index 811d01a..a35233d 100644 --- a/src/file_io.c +++ b/src/file_io.c @@ -9,60 +9,25 @@ #include #include -extern char *strdup(const char *); -extern ssize_t getline(char **restrict lineptr, size_t *restrict n, - FILE *restrict stream); -extern int ftruncate(int fd, off_t length); extern struct editorConfig E; -// Convert utf_8_char_t array to byte string -char *editorRowsToString(int *buffer_len) { - int tot_len = 0; - int j, i; - char *buf; - char *p; - // Calculate total byte length (not character count) - for (j = 0; j < E.numrows; ++j) { - // Count actual bytes in each character - for (i = 0; i < E.row[j].size; i++) { - tot_len += E.row[j].chars[i].len; - } - tot_len++; // For newline - } - - *buffer_len = tot_len; - buf = malloc(tot_len); - if (!buf) return NULL; - - p = buf; - for (j = 0; j < E.numrows; ++j) { - // Copy each character's bytes - for (i = 0; i < E.row[j].size; i++) { - for (int k = 0; k < E.row[j].chars[i].len; k++) { - *p++ = E.row[j].chars[i].c[k]; - } - } - *p++ = '\n'; - } - - return buf; -} - -void editorCloseFile(void) { +void editorCloseFile(void) +{ // Free all rows - for (int i = 0; i < E.numrows; i++) { - editorFreeRow(&E.row[i]); + for (int i = 0; i < E.numrows; i++) + { + editorFreeRow(&E.rows[i]); } - + E.cursor_x = 0; E.cursor_y = 0; E.rx = 0; E.row_offset = 0; E.col_offset = 0; E.numrows = 0; - free(E.row); - E.row = NULL; + free(E.rows); + E.rows = NULL; E.dirty = 0; free(E.filename); E.filename = NULL; @@ -70,11 +35,13 @@ void editorCloseFile(void) { E.status_msg_time = 0; } -void editorOpen(char *filename) { - FILE *fp; +void editorOpen(char* filename) +{ + FILE* fp; // Test if a file is already open - if (E.filename != NULL) { + if (E.filename != NULL) + { editorCloseFile(); } E.state = READ_AND_WRITE; @@ -82,136 +49,117 @@ void editorOpen(char *filename) { E.filename = strdup(filename); fp = fopen(filename, "r"); - if (!fp) { + fprintf(stderr, "reading file %s\n", filename); + if (!fp) + { // File doesn't exist - that's okay, we'll create it on save E.dirty = 0; return; } - char *line = NULL; - size_t line_cap = 0; - ssize_t line_len; + char* line = NULL; + size_t line_len; - while ((line_len = getline(&line, &line_cap, fp)) != -1) { + while ((line_len = getline(&line, &line_len, fp)) != -1) + { + fprintf(stderr, "%s %d", line, line_len); // Strip newline characters while (line_len > 0 && - (line[line_len - 1] == '\n' || line[line_len - 1] == '\r')) { + (line[line_len - 1] == '\n' || line[line_len - 1] == '\r')) + { --line_len; } + fprintf(stderr, "len %d\n", line_len); // editorInsertRow will convert bytes to utf_8_char_t - editorInsertRow(E.numrows, line, line_len); + fprintf(stderr, "row number : %d\n", E.numrows); + editorInsertRow(E.numrows, line, (int) line_len); } free(line); fclose(fp); E.dirty = 0; } -void editorSave() { +void editorSave() +{ int len; - char *buf; + char* buf; int fd; - - if (E.filename == NULL) { + + if (E.filename == NULL) + { E.filename = editorPrompt("Save as: %s (ESC to cancel)", "", 1); - if (E.filename == NULL) { + if (E.filename == NULL) + { editorSetStatusMessage("Save aborted"); return; } } - - buf = editorRowsToString(&len); - if (!buf) { - editorSetStatusMessage("Can't save! Memory error"); - return; - } - + fd = open(E.filename, O_RDWR | O_CREAT | O_TRUNC, 0644); - if (fd != -1) { - if (write(fd, buf, len) == len) { - close(fd); - free(buf); - E.dirty = 0; - editorSetStatusMessage("%d bytes written to disk", len); - return; - } - close(fd); + for (int i = 0; i < E.numrows; i++) + { + write(fd, E.rows[i].chars, E.rows[i].size); + write(fd, "\n", 1); + // fputc('\n', fp); } - - free(buf); - editorSetStatusMessage("Can't save! I/O error: %s", strerror(errno)); + close(fd); + + E.dirty = 0; + editorSetStatusMessage("%d bytes written to disk", len); } -// Helper to convert utf_8_char_t array to byte string for searching -static char *row_to_string(erow *row) { - // Calculate byte length - int byte_len = 0; - for (int i = 0; i < row->rsize; i++) { - byte_len += row->render[i].len; - } - - char *str = malloc(byte_len + 1); - if (!str) return NULL; - - // Convert to bytes - int pos = 0; - for (int i = 0; i < row->rsize; i++) { - for (int j = 0; j < row->render[i].len; j++) { - str[pos++] = row->render[i].c[j]; - } - } - str[pos] = '\0'; - - return str; -} - -void editorFind() { - char *query = editorPrompt("Search: %s (ESC to cancel)", "", 0); +void editorFind() +{ + char* query = editorPrompt("Search: %s (ESC to cancel)", "", 0); if (query == NULL) return; - + int saved_cursor_x = E.cursor_x; int saved_cursor_y = E.cursor_y; int saved_row_offset = E.row_offset; int saved_col_offset = E.col_offset; - +#if 0 // Search from current position forward - for (int i = E.cursor_y; i < E.numrows; i++) { - erow *row = &E.row[i]; - + for (int i = E.cursor_y; i < E.numrows; i++) + { + row_t* row = &E.rows[i]; + // Convert row to byte string for searching - char *render_str = row_to_string(row); + char* render_str = row_to_string(row); if (!render_str) continue; - - char *match = strstr(render_str, query); - if (match) { + + char* match = strstr(render_str, query); + if (match) + { E.cursor_y = i; - + // Find the character index from byte position int byte_pos = match - render_str; int char_idx = 0; int current_byte = 0; - - for (char_idx = 0; char_idx < row->rsize; char_idx++) { + + for (char_idx = 0; char_idx < row->rsize; char_idx++) + { if (current_byte >= byte_pos) break; current_byte += row->render[char_idx].len; } - + E.cursor_x = editorRowRxToCx(row, char_idx); E.row_offset = E.numrows; // Force scroll - + free(render_str); free(query); return; } - + free(render_str); } - + // Not found - restore cursor position E.cursor_x = saved_cursor_x; E.cursor_y = saved_cursor_y; E.row_offset = saved_row_offset; E.col_offset = saved_col_offset; - +#endif editorSetStatusMessage("Not found: %s", query); free(query); } diff --git a/src/init.c b/src/init.c index a0e5ac0..acb18a2 100644 --- a/src/init.c +++ b/src/init.c @@ -32,7 +32,6 @@ void initBuiltins() { registerBuiltin("EDITOR-OPEN-FILE", editorOpenFile); registerBuiltin("EDITOR-INSERT-CHAR", editorPrintC); registerBuiltin("ADD-PACKAGE", addPackage); - registerBuiltin("EDITOR-DEL-ROW", editorDelRow_L); registerBuiltin("EDITOR-FIND", editorFind_L); registerBuiltin("EDITOR-READ-CHAR", editorReadChar_L); } @@ -45,7 +44,7 @@ void initEditor() { E.row_offset = 0; E.col_offset = 0; E.numrows = 0; - E.row = NULL; + E.rows = NULL; E.dirty = 0; E.filename = NULL; E.state = READ_ONLY; diff --git a/src/input.c b/src/input.c index 0632fff..a85f273 100644 --- a/src/input.c +++ b/src/input.c @@ -13,236 +13,182 @@ #include extern struct editorConfig E; - -char *file_completion(const char *path) { - DIR *dir; - struct dirent *entry; - char directory[128]; - char predict[128]; - int predict_len = 0; +char * file_completion(const char *path) { + DIR * dir; + struct dirent *entry; + char directory[128]; + char predict[128]; + int predict_len = 0; if (path[strlen(path) - 1] == '/') { - return strdup(path); + return path; } - // Find dir name - char *last_slash = strrchr(path, '/'); - if (last_slash) { - size_t dir_len = last_slash - path + 1; - strncpy(directory, path, dir_len); - predict_len = strlen(path) - dir_len; - strncpy(predict, last_slash + 1, predict_len); - directory[dir_len] = '\0'; - predict[predict_len] = '\0'; - } else { - return NULL; - } + // Find dir name + char * last_slash = strrchr(path, '/'); + if (last_slash) { + size_t dir_len = last_slash - path + 1; // length of dir_path + strncpy(directory, path, dir_len); + predict_len = strlen(path) - dir_len - 1; + strncpy(predict, last_slash + 1, predict_len); + directory[dir_len] = '\0'; + predict[predict_len] = '\0'; + fprintf(stderr, "%s %s\n", directory, predict); + } else { + return NULL; + } + dir = opendir(directory); if (!dir) + return NULL; + + while ((entry = readdir(dir)) != NULL) { + if (strncmp(entry->d_name, predict, predict_len) == 0) { + static char full_path[128]; + snprintf(full_path, sizeof(full_path), "%s%s", directory, entry->d_name); + + struct stat st; + if (stat(full_path, &st) == 0 && S_ISDIR(st.st_mode)) { + strcat(full_path, "/"); // add slash for directories + } + + return strdup(full_path); + } + } + + // Cleanup when no more entries + closedir(dir); + dir = NULL; return NULL; - while ((entry = readdir(dir)) != NULL) { - if (strncmp(entry->d_name, predict, predict_len) == 0) { - static char full_path[128]; - snprintf(full_path, sizeof(full_path), "%s%s", directory, entry->d_name); - - struct stat st; - if (stat(full_path, &st) == 0 && S_ISDIR(st.st_mode)) { - strcat(full_path, "/"); - } - closedir(dir); - return strdup(full_path); - } - } - - closedir(dir); - return NULL; } /** * \fn char * editorPrompt(struct editorConfig *E, char *prompt, char bPathMode) * \brief Return user input in a prompt when enter is hit. */ -char *editorPrompt(char *prompt, char *placeHolder, char bPathMode) { +char *editorPrompt(char *prompt, char * placeHolder, char bPathMode) { size_t buf_size = 128; char *buf = malloc(buf_size); size_t buf_len = 0; + int c = 0; buf[0] = '\0'; - strcpy(buf, placeHolder); - buf_len = strlen(placeHolder); + strcpy(buf, placeHolder); + buf_len = strlen(placeHolder); while (1) { - editorSetStatusMessage(prompt, buf); + editorSetStatusMessage(prompt, buf); editorRefreshScreen(); - - KeyInfo *key = editorReadKey(); - - // Handle backspace/delete - if (key->type == KEY_SPECIAL && (key->data.special == 127 || key->data.special == 8)) { + c = editorReadKey(); + if (c == DEL_KEY || c == CTRL_KEY('h') || c == BACKSPACE) { if (buf_len != 0) { buf[--buf_len] = '\0'; } - } - // Handle Ctrl+H (backspace) - else if (key->type == KEY_CTRL && key->data.ctrl_char == 'H') { - if (buf_len != 0) { - buf[--buf_len] = '\0'; - } - } - // Handle ESC - else if (key->type == KEY_SPECIAL && key->data.special == 27) { + } else if (c == ESCAPE) { editorSetStatusMessage(""); free(buf); return NULL; - } - // Handle Enter - else if (key->type == KEY_SPECIAL && (key->data.special == 13 || key->data.special == 10)) { + } else if (c == '\r') { if (buf_len != 0) { editorSetStatusMessage(""); return buf; } - } - // Handle Tab for path completion - else if (bPathMode && key->type == KEY_SPECIAL && key->data.special == 9) { - char path[128]; - char *pwd; - if (buf[0] != '/') { - pwd = getenv("PWD"); - snprintf(path, sizeof(path), "%s/%s", pwd, buf); - } else { - strcpy(path, buf); - } - - char *completion = file_completion(path); - if (completion) { - memset(buf, 0, buf_size); - strcpy(buf, completion); - buf_len = strlen(buf); - free(completion); - } - } - // Handle regular characters (ASCII only for prompts) - else if (key->type == KEY_CHAR && key->data.codepoint < 128) { + } else if (bPathMode && c == '\t') { + char path[128]; + char * pwd; + if (buf[0] != '/') { + pwd = getenv("PWD"); + fprintf(stderr, "%s\n", pwd); + memcpy(path, pwd, strlen(pwd)); + path[strlen(pwd)] = '/'; + strncat(path, buf, buf_len); + } else { + strcpy(path, buf); + } + memset(buf, 0, 128); + buf_len = 0; + strcpy(buf, file_completion(path)); + buf_len = strlen(buf); + buf[buf_len] = '\0'; + + } else if (!iscntrl(c) && c < 128) { if (buf_len == buf_size - 1) { buf_size *= 2; buf = realloc(buf, buf_size); } - buf[buf_len++] = (char)key->data.codepoint; + buf[buf_len++] = c; buf[buf_len] = '\0'; } } } -void editorMoveCursor(KeyInfo *key) { - if (key->type != KEY_ARROW) return; - - erow *row = (E.cursor_y >= E.numrows) ? NULL : &E.row[E.cursor_y]; - int row_len; - - switch (key->data.arrow) { - case 'C': // Right - if (row && E.cursor_x < row->size) { - ++E.cursor_x; - } else if (row && E.cursor_x == row->size) { - E.cursor_y++; - E.cursor_x = 0; - } - break; - case 'B': // Down - if (E.cursor_y < E.numrows) { - ++E.cursor_y; - } - break; - case 'A': // Up - if (E.cursor_y != 0) { - --E.cursor_y; - } - break; - case 'D': // Left - if (E.cursor_x != 0) { - --E.cursor_x; - } else if (E.cursor_y > 0) { - --E.cursor_y; - E.cursor_x = E.row[E.cursor_y].size; - } - break; - } +void editorMoveCursor(int key) +{ + row_t *row = (E.cursor_y >= E.numrows) ? NULL : &E.rows[E.cursor_y]; + int row_len; + switch (key) { + case ARROW_RIGHT: + if (row && E.cursor_x < row->size) { + ++E.cursor_x; + } else if (row && E.cursor_x == row->size) { + E.cursor_y++; + E.cursor_x = 0; + } + break; + case ARROW_DOWN: + if (E.cursor_y < E.numrows) { + ++E.cursor_y; + } + break; + case ARROW_UP: + if (E.cursor_y != 0) { + --E.cursor_y; + } + break; + case ARROW_LEFT: + if (E.cursor_x != 0) { + --E.cursor_x; + } else if (E.cursor_y > 0) { + --E.cursor_y; + E.cursor_x = E.rows[E.cursor_y].size; + } + break; + } - row = (E.cursor_y >= E.numrows) ? NULL : &E.row[E.cursor_y]; - row_len = row ? row->size : 0; - if (E.cursor_x > row_len) { - E.cursor_x = row_len; - } + row = (E.cursor_y >= E.numrows) ? NULL : &E.rows[E.cursor_y]; + row_len = row ? row->size : 0; + if (E.cursor_x > row_len) { + E.cursor_x = row_len; + } } -KeyInfo *stringToCodepoint(const char *string) { - KeyInfo *key = (KeyInfo *)malloc(sizeof(KeyInfo)); - // test control key - if (!strncmp("CTRL", string, 4)) { - key->type = KEY_CTRL; - key->data.ctrl_char = toupper(string[6]) + 64; - } else if (!strncmp("ARROW", string, 5)) { - key->type = KEY_ARROW; - if (!strcmp("UP", string + 7)) { - key->data.arrow = 'A'; - } else if (!strcmp("DOWN", string + 7)) { - key->data.arrow = 'B'; - } else if (!strcmp("RIGHT", string + 7)) { - key->data.arrow = 'C'; - } else if (!strcmp("LEFT", string + 7)) { - key->data.arrow = 'D'; - } - } +int executeKeyBind(char *key_sequence) { + int i; + for (i = 0; i < E.number_of_keybinds; ++i) { + if (!strcmp(key_sequence, E.key_binds[i].key_sequence)) { - return key; -} - -static int key_match(KeyInfo *a, KeyInfo *b) { - if (a->type != b->type) return 0; - if (a->modifiers != b->modifiers) return 0; - - switch (a->type) { - case KEY_CTRL: - return toupper(a->data.ctrl_char) == toupper(b->data.ctrl_char); - case KEY_ALT: - return a->data.alt_char == b->data.alt_char; - case KEY_ARROW: - return a->data.arrow == b->data.arrow; - case KEY_FUNCTION: - return a->data.function_num == b->data.function_num; - case KEY_CHAR: - return a->data.codepoint == b->data.codepoint; - case KEY_SPECIAL: - case KEY_NAVIGATION: - return a->data.special == b->data.special; - default: - return 0; - } -} - -int executeKeyBind(KeyInfo *key_sequence) { - for (int i = 0; i < E.number_of_keybinds; ++i) { - fprintf(stderr, "Keybind found\n"); - if (key_match(key_sequence, E.key_binds[i].key_sequence)) { - // Execute the lisp command - lisp_eval(lisp_cons(E.key_binds[i].command, lisp_null(), E.ctx), - &E.ctx_error, E.ctx); - return 1; - } - } - return 0; + fprintf(stderr, "lisp function %s\n", key_sequence); + // It's a symbol, create a function call + lisp_eval(lisp_cons(E.key_binds[i].command, lisp_null(), E.ctx), + &E.ctx_error, E.ctx); + return 1; + } + } + return 0; } void editorProcessKeypress() { - KeyInfo *key = editorReadKey(); - if (!key) - return; + int c = editorReadKey(); + char* key_sequence; - if (executeKeyBind(key)) { - fprintf(stderr, "Keybinds found\n"); - return; - } - editorInsertChar(&key->c); - E.quit_times_buffer = E.constantes.QUIT_TIMES; -} + key_sequence = key_to_string(c); + fprintf(stderr, "%s\n", key_sequence); + + if (executeKeyBind(key_to_string(c))) { + return; + } + editorInsertChar(c); + E.quit_times_buffer = E.constantes.QUIT_TIMES; + +} \ No newline at end of file diff --git a/src/output.c b/src/output.c index b337628..b58c6dd 100644 --- a/src/output.c +++ b/src/output.c @@ -4,18 +4,7 @@ #include #include -extern struct editorConfig E; - -static void utf8_to_bytes(utf_8_char_t *chars, int count, unsigned char *output, int *output_len) { - int pos = 0; - for (int i = 0; i < count; i++) { - for (int j = 0; j < chars[i].len; j++) { - output[pos++] = chars[i].c[j]; - } - fprintf(stderr, "bytes length : %s %d\n", chars[i].c, pos); - } - *output_len = pos; -} +#include "include/utf8.h" void editorDrawRows(struct abuf *ab) { int y; @@ -24,6 +13,8 @@ void editorDrawRows(struct abuf *ab) { int padding; int len; int file_row; + row_t *row; + for (y = 0; y < E.screenrows; ++y) { file_row = y + E.row_offset; if (file_row >= E.numrows) { @@ -47,20 +38,22 @@ void editorDrawRows(struct abuf *ab) { abAppend(ab, "~", 1); } } else { - len = E.row[file_row].rsize - E.col_offset; - if (len < 0) - len = 0; - if (len > E.screencols) - len = E.screencols; - if (len > 0) { - unsigned char *display_buf = malloc(len * 4); // Max 4 bytes per char - int byte_len; - - utf8_to_bytes(&E.row[file_row].render[E.col_offset], len, display_buf, - &byte_len); - abAppend(ab, display_buf, byte_len); - fprintf(stderr, "display buffer : %s %d\n", display_buf, byte_len); - free(display_buf); + int rx = 0, i = 0; + int rendered = 0; + row_t *row = &E.rows[E.row_offset + y]; + while (i < row->size && rendered < E.screencols) { + int sl = utf8Seqlen((unsigned char)row->chars[i]); + if (sl < 1) sl = 1; + const char *p = row->chars + i; + uint32_t cp = utf8Decode(&p); + int w = codepointWidth(cp); if (w == 0) w = 1; + if (rx >= E.col_offset) { + if (rendered + w > E.screencols) break; + abAppend(ab, row->chars + i, sl); + rendered += w; + } + rx += w; + i += sl; } } abAppend(ab, ERASE_END_LINE, 3); @@ -68,24 +61,29 @@ void editorDrawRows(struct abuf *ab) { } } -void editorScroll() { - E.rx = E.cursor_x; - if (E.cursor_y < E.numrows) { - E.rx = editorRowCxToRx(&E.row[E.cursor_y], E.cursor_x); +int editorCxToRx(void) { + if (E.cursor_y >= E.numrows) return E.cursor_x; + row_t *row = &E.rows[E.cursor_y]; + int rx = 0, i = 0, col = 0; + while (col < E.cursor_x && i < row->size) { + int sl = utf8Seqlen((unsigned char)row->chars[i]); + if (sl < 1) sl = 1; + const char *p = row->chars + i; + uint32_t cp = utf8Decode(&p); + int w = codepointWidth(cp); + if (w == 0) w = 1; + rx += w; + i += sl; col++; } + return rx; +} - if (E.cursor_y < E.row_offset) { - E.row_offset = E.cursor_y; - } - if (E.cursor_y >= E.row_offset + E.screenrows) { - E.row_offset = E.cursor_y - E.screenrows + 1; - } - if (E.rx < E.col_offset) { - E.col_offset = E.rx; - } - if (E.rx >= E.col_offset + E.screencols) { - E.col_offset = E.rx - E.screencols + 1; - } +void editorScroll() { + E.rx = editorCxToRx(); + if (E.cursor_y < E.row_offset) E.row_offset = E.cursor_y; + if (E.cursor_y >= E.row_offset + E.screenrows) E.row_offset = E.cursor_y - E.screenrows + 1; + if (E.rx < E.col_offset) E.col_offset = E.rx; + if (E.rx >= E.col_offset + E.screencols) E.col_offset = E.rx - E.screencols + 1; } void editorDrawStatusBar(struct abuf *ab) { @@ -130,6 +128,7 @@ void editorRefreshScreen() { editorScroll(); struct abuf ab = ABUF_INIT; char buf[32]; + int len; abAppend(&ab, HIDE_CURSOR, 6); abAppend(&ab, CURSOR_TOP_LEFT, 3); @@ -138,9 +137,9 @@ void editorRefreshScreen() { editorDrawStatusBar(&ab); editorDrawMessageBar(&ab); - snprintf(buf, sizeof(buf), "\x1b[%d;%dH", (E.cursor_y - E.row_offset) + 1, + len = snprintf(buf, sizeof(buf), "\x1b[%d;%dH", (E.cursor_y - E.row_offset) + 1, (E.rx - E.col_offset) + 1); - abAppend(&ab, buf, strlen(buf)); + abAppend(&ab, buf, len); abAppend(&ab, SHOW_CURSOR, 6); diff --git a/src/row_op.c b/src/row_op.c index 1650f68..401e3b5 100644 --- a/src/row_op.c +++ b/src/row_op.c @@ -6,213 +6,65 @@ #include #include -extern struct editorConfig E; +#include "include/utf8.h" -static int is_tab(utf_8_char_t *ch) { - return ch->len == 1 && ch->c[0] == '\t'; -} - -// Helper function to check if two utf_8_char_t are equal -static int utf8_char_equal(utf_8_char_t *a, utf_8_char_t *b) { - if (a->len != b->len) return 0; - return memcmp(a->c, b->c, a->len) == 0; -} - -// Helper function to create a space character -static utf_8_char_t make_space() { - utf_8_char_t space; - space.c[0] = ' '; - space.len = 1; - return space; -} - -int editorRowCxToRx(erow *row, int cursor_x) { - int render_x = 0; - int i; - for (i = 0; i < cursor_x; ++i) { - if (is_tab(&row->chars[i])) { - render_x += (E.constantes.TAB_LENGTH - 1) - (render_x % E.constantes.TAB_LENGTH); - } - render_x++; - } - return render_x; -} - -int editorRowRxToCx(erow *row, int rx) { - int cur_rx = 0; - int cx; - for (cx = 0; cx < row->size; cx++) { - if (is_tab(&row->chars[cx])) - cur_rx += (E.constantes.TAB_LENGTH - 1) - (cur_rx % E.constantes.TAB_LENGTH); - cur_rx++; - if (cur_rx > rx) return cx; - } - return cx; -} - -/** - * \fn editorUpdateRow(erow *row) - * \brief Copy content of \p row in \p row->render. - * */ - -void editorUpdateRow(erow *row) { - int i, i_render; - int tabs = 0; - - // Count number of tabs - for (i = 0; i < row->size; ++i) { - if (is_tab(&row->chars[i])) { - tabs++; - } - } - - free(row->render); - // Allocate space for utf_8_char_t array - row->render = malloc(sizeof(utf_8_char_t) * (row->size + tabs * (E.constantes.TAB_LENGTH - 1))); - - if (!row->render) { - row->rsize = 0; - return; - } - - i_render = 0; - for (i = 0; i < row->size; ++i) { - if (is_tab(&row->chars[i])) { - // Replace tab with spaces - row->render[i_render++] = make_space(); - while (i_render % E.constantes.TAB_LENGTH) { - row->render[i_render++] = make_space(); - } - } else { - row->render[i_render++] = row->chars[i]; - } - } - row->rsize = i_render; -} - -void editorInsertRow(int at, char *s, size_t len) { +void editorInsertRow(int at, char *s, int len) { if (at < 0 || at > E.numrows) { return; } - erow *tmp = (erow *)realloc(E.row, sizeof(erow) * (E.numrows + 1)); + row_t *tmp = (row_t *)realloc(E.rows, sizeof(row_t) * (E.numrows + 1)); if (!tmp) { return; } - E.row = tmp; - memmove(&E.row[at + 1], &E.row[at], sizeof(erow) * (E.numrows - at)); + E.rows = tmp; + memmove(&E.rows[at + 1], &E.rows[at], sizeof(row_t) * (E.numrows - at)); // Initialize the new row - E.row[at].size = 0; - E.row[at].chars = NULL; - E.row[at].rsize = 0; - E.row[at].render = NULL; + E.rows[at].size = len; + E.rows[at].chars = NULL; - // Count UTF-8 characters first - int char_count = 0; - int i = 0; - while (i < len) { - unsigned char first = (unsigned char)s[i]; - int char_len; - - if ((first & 0x80) == 0) { - char_len = 1; - } else if ((first & 0xE0) == 0xC0) { - char_len = 2; - } else if ((first & 0xF0) == 0xE0) { - char_len = 3; - } else if ((first & 0xF8) == 0xF0) { - char_len = 4; - } else { - char_len = 1; // Invalid, treat as single byte - } - - i += char_len; - char_count++; - } + E.rows[at].cap = len + 1; + E.rows[at].chars = malloc(E.rows[at].cap); - // Allocate for the actual number of characters - if (char_count > 0) { - E.row[at].chars = malloc(sizeof(utf_8_char_t) * char_count); - if (!E.row[at].chars) { - return; - } - } - - // Now convert to utf_8_char_t array - i = 0; - E.row[at].size = 0; - while (i < len && E.row[at].size < char_count) { - utf_8_char_t ch; - - unsigned char first = (unsigned char)s[i]; - if ((first & 0x80) == 0) { - ch.len = 1; - } else if ((first & 0xE0) == 0xC0) { - ch.len = 2; - } else if ((first & 0xF0) == 0xE0) { - ch.len = 3; - } else if ((first & 0xF8) == 0xF0) { - ch.len = 4; - } else { - ch.len = 1; - } - - // Copy bytes - for (int j = 0; j < ch.len && i < len; j++) { - ch.c[j] = s[i++]; - } - - E.row[at].chars[E.row[at].size++] = ch; - } + memcpy(E.rows[at].chars, s, len); + E.rows[at].chars[len] = '\n'; - editorUpdateRow(&E.row[at]); - - ++E.numrows; ++E.dirty; } -void editorFreeRow(erow *row) { - free(row->render); +void editorFreeRow(row_t *row) { free(row->chars); } -void editorDelRow(int at) { - if (at < 0 || at >= E.numrows) { - return; +int editorRowCxToByte(const row_t *row, int cursor_x) { + int i = 0, col = 0; + while (col < cursor_x && i < row->size) { + int sl = utf8Seqlen((unsigned char)row->chars[i]); + if (sl < 1) sl = 1; + col++; + i += sl; } - editorFreeRow(&E.row[at]); - memmove(&E.row[at], &E.row[at + 1], sizeof(erow) * (E.numrows - at - 1)); - --E.numrows; - ++E.dirty; + return i; } /** * \fn editorRowInsertChar(erow *row, int at, int c) * \param at Index of where we want to insert the char */ -void editorRowInsertChar(erow *row, int at, utf_8_char_t c) { +void editorRowInsertBytes(row_t *row, int at, const char *src, int n) { if (E.state == READ_ONLY) return; - if (at < 0 || at > row->size) { - at = row->size; + if (row->size + n + 1 > row->cap) { + row->cap = (row->size + n + 1) * 2; + row->chars = realloc(row->chars, row->cap); } - row->chars = realloc(row->chars, row->size + 1); - memmove(&row->chars[at + 1], &row->chars[at], row->size - at + 1); - ++(row->size); - row->chars[at] = c; - fprintf(stderr, "Row insert : %s %d\n", c.c, c.len); - editorUpdateRow(row); - ++E.dirty; -} - -void editorRowAppendString(erow *row, char *s, size_t len) { - row->chars = realloc(row->chars, row->size + len + 1); - memcpy(&row->chars[row->size], s, len); - row->size += len; - editorUpdateRow(row); + memmove(row->chars + at + n, row->chars + at, row->size - at); + memcpy(row->chars + at, src, n); + row->size += n; + row->chars[row->size] = '\0'; ++E.dirty; } @@ -221,12 +73,19 @@ void editorRowAppendString(erow *row, char *s, size_t len) { * \brief Delete the a char at the chosen position on the given row * \param at Index of the char to delete * \param row Row on operation is made */ -void editorRowDelchar(erow *row, int at) { - if (at < 0 || at >= row->size) { - return; - } - memmove(&row->chars[at], &row->chars[at + 1], row->size - at); - --row->size; - editorUpdateRow(row); - ++E.dirty; +void editorRowDelByte(row_t *row, int at, int n) { + memmove(row->chars + at, row->chars + at + n, row->size - at - n); + row->size -= n; + row->chars[row->size] = '\0'; } + +int editorRowCharCount(row_t *row) +{ + int n = 0, i = 0; + while (i < row->size) { + int sl = utf8Seqlen((unsigned char)row->chars[i]); + if (sl < 1) sl = 1; + n++; i += sl; + } + return n; +} \ No newline at end of file diff --git a/src/terminal.c b/src/terminal.c index 29626ba..15899dc 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -1,10 +1,16 @@ #include "../include/terminal.h" + +#include + #include "../include/data.h" +#include "../include/define.h" #include #include #include +#include "include/utf8.h" + void die(const char *s) { write(STDOUT_FILENO, "\x1b[2J", 4); write(STDOUT_FILENO, CURSOR_TOP_LEFT, 3); @@ -37,211 +43,97 @@ void enableRawMode() { } } -int utf8_char_length(unsigned char first_byte) { - if ((first_byte & 0x80) == 0) - return 1; // 0xxxxxxx - ASCII - if ((first_byte & 0xE0) == 0xC0) - return 2; // 110xxxxx - 2 bytes - if ((first_byte & 0xF0) == 0xE0) - return 3; // 1110xxxx - 3 bytes - if ((first_byte & 0xF8) == 0xF0) - return 4; // 11110xxx - 4 bytes - return 1; // Invalid, treat as single byte -} +#include /* isprint */ -// Convert UTF-8 to Unicode code point -unsigned int utf8_to_codepoint(const unsigned char *bytes, int len) { - if (len == 1) - return bytes[0]; - if (len == 2) - return ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F); - if (len == 3) - return ((bytes[0] & 0x0F) << 12) | ((bytes[1] & 0x3F) << 6) | - (bytes[2] & 0x3F); - if (len == 4) - return ((bytes[0] & 0x07) << 18) | ((bytes[1] & 0x3F) << 12) | - ((bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F); - return 0; -} +char *key_to_string(int key) { + static char key_str[32]; -void parse_key(unsigned char *seq, int len, KeyInfo *key) { - memcpy(key->c.c, seq, len); - key->c.len = len; - key->modifiers = MOD_NONE; - key->type = KEY_UNKNOWN; - - // Control characters (Ctrl+A to Ctrl+Z) - if (len == 1 && seq[0] < 32 && seq[0] != 27 && seq[0] != 9 && seq[0] != 10 && - seq[0] != 13) { - key->type = KEY_CTRL; - key->data.ctrl_char = seq[0] + 64; - return; - } - - // Special single characters - if (len == 1) { - switch (seq[0]) { - case 9: - case 10: - case 13: - case 27: - case 127: - key->type = KEY_SPECIAL; - key->data.special = seq[0]; - return; + if (key == '\r') { + strcpy(key_str, "ENTER"); + } else if (key >= 1 && key <= 26) { + snprintf(key_str, sizeof(key_str), "CTRL-%c", 'a' + key - 1); + } else { + switch (key) { + case ARROW_UP: strcpy(key_str, "ARROW-UP"); break; + case ARROW_DOWN: strcpy(key_str, "ARROW-DOWN"); break; + case ARROW_LEFT: strcpy(key_str, "ARROW-LEFT"); break; + case ARROW_RIGHT: strcpy(key_str, "ARROW-RIGHT"); break; + case PAGE_UP: strcpy(key_str, "PAGE-UP"); break; + case PAGE_DOWN: strcpy(key_str, "PAGE-DOWN"); break; + case DEL_KEY: strcpy(key_str, "DEL"); break; + case BACKSPACE: strcpy(key_str, "BACKSPACE"); break; + case BEG_LINE: strcpy(key_str, "HOME"); break; + case END_LINE: strcpy(key_str, "END"); break; + case '\x1b': strcpy(key_str, "ESCAPE"); break; + default: + if (key > 127) { + /* UTF-8 code point — re-encode into the buffer */ + char buf[5] = {0}; + int n = utf8Encode((uint32_t)key, buf); + snprintf(key_str, sizeof(key_str), "%.*s", n, buf); + } else if (isprint(key)) { + snprintf(key_str, sizeof(key_str), "%c", key); + } else { + snprintf(key_str, sizeof(key_str), "KEY-%d", key); + } } } + return key_str; +} - // Escape sequences - if (len >= 2 && seq[0] == 27) { - // Alt+key combinations - if (len == 2 && seq[1] >= 32 && seq[1] < 127) { - key->type = KEY_ALT; - key->data.alt_char = seq[1]; - return; - } +int editorReadKey() { + char c; + /* read first byte — may be start of UTF-8 or escape */ + while (read(STDIN_FILENO, &c, 1) != 1); - // CSI sequences (ESC [ ...) - if (len >= 3 && seq[1] == '[') { - // Arrow keys - if (len == 3) { - switch (seq[2]) { - case 'A': - case 'B': - case 'C': - case 'D': - key->type = KEY_ARROW; - key->data.arrow = seq[2]; - return; - case 'H': - case 'F': - key->type = KEY_NAVIGATION; - key->data.special = seq[2]; - return; - } - } - - // Modified keys (ESC [ 1 ; modifier letter) - if (len >= 6 && seq[2] == '1' && seq[3] == ';') { - int modifier = seq[4] - '0'; - char k = seq[5]; - - if (modifier & 1) - key->modifiers |= MOD_SHIFT; - if (modifier & 2) - key->modifiers |= MOD_ALT; - if (modifier & 4) - key->modifiers |= MOD_CTRL; - - switch (k) { - case 'A': - case 'B': - case 'C': - case 'D': - key->type = KEY_ARROW; - key->data.arrow = k; - return; - case 'H': - case 'F': - key->type = KEY_NAVIGATION; - key->data.special = k; - return; - } - } - - // Function keys and navigation - if (len == 4 && seq[3] == '~') { - int num = seq[2] - '0'; - if (num >= 1 && num <= 6) { - key->type = KEY_NAVIGATION; - key->data.special = seq[2]; - return; - } - } - - if (len == 5 && seq[4] == '~') { - int num = (seq[2] - '0') * 10 + (seq[3] - '0'); - if (num >= 15 && num <= 24) { - key->type = KEY_FUNCTION; - // Map to F5-F12 - int f_map[] = {15, 17, 18, 19, 20, 21, 23, 24}; - for (int i = 0; i < 8; i++) { - if (f_map[i] == num) { - key->data.function_num = i + 5; - return; + if (c == '\x1b') { + char seq[6]; + /* try to read escape sequence */ + if (read(STDIN_FILENO, &seq[0], 1) != 1) return '\x1b'; + if (read(STDIN_FILENO, &seq[1], 1) != 1) return '\x1b'; + if (seq[0] == '[') { + if (seq[1] >= '0' && seq[1] <= '9') { + if (read(STDIN_FILENO, &seq[2], 1) != 1) return '\x1b'; + if (seq[2] == '~') { + switch (seq[1]) { + case '1': return BEG_LINE; + case '3': return DEL_KEY; + case '4': return END_LINE; + case '5': return PAGE_UP; + case '6': return PAGE_DOWN; + case '7': return BEG_LINE; + case '8': return END_LINE; + } + } + } else { + switch (seq[1]) { + case 'A': return ARROW_UP; + case 'B': return ARROW_DOWN; + case 'C': return ARROW_RIGHT; + case 'D': return ARROW_LEFT; + case 'H': return BEG_LINE; + case 'F': return END_LINE; + } } - } } - } + return '\x1b'; } - // SS3 sequences (ESC O ...) - if (len == 3 && seq[1] == 'O') { - switch (seq[2]) { - case 'P': - case 'Q': - case 'R': - case 'S': - key->type = KEY_FUNCTION; - key->data.function_num = seq[2] - 'P' + 1; - return; - case 'H': - case 'F': - key->type = KEY_NAVIGATION; - key->data.special = seq[2]; - return; - } + /* multi-byte UTF-8: read remaining bytes */ + int seqlen = utf8Seqlen((unsigned char)c); + if (seqlen > 1) { + /* pack into a pseudo-codepoint just to pass bytes through; + we handle encoding/decoding at the row level */ + char buf[4] = {c, 0, 0, 0}; + for (int i = 1; i < seqlen; i++) + if (read(STDIN_FILENO, &buf[i], 1) != 1) break; + /* decode and return as uint32, but we need int — use high range */ + const char *p = buf; + uint32_t cp = utf8Decode(&p); + return (int)cp; /* caller re-encodes when inserting */ } - } - // UTF-8 character - if (seq[0] >= 32 || (seq[0] & 0x80)) { - int char_len = utf8_char_length(seq[0]); - fprintf(stderr, "char length : %d\n", char_len); - if (char_len <= len) { - key->type = KEY_CHAR; - memcpy(key->c.c, seq, len); - key->c.len = len; - return; - } - } -} - -KeyInfo *editorReadKey() { - fd_set fds; - int timeout_ms = 10; - struct timeval tv; - int total = 0; - KeyInfo *key = (KeyInfo *)malloc(sizeof(KeyInfo)); - int len; - unsigned char buffer[20]; - - if (read(STDIN_FILENO, &buffer[0], 1) <= 0) - return 0; - - while (total < 20) { - FD_ZERO(&fds); - FD_SET(STDIN_FILENO, &fds); - tv.tv_sec = 0; - tv.tv_usec = timeout_ms * 1000; - - int ret = select(STDIN_FILENO + 1, &fds, NULL, NULL, &tv); - if (ret <= 0) - break; - - if (read(STDIN_FILENO, &buffer[total], 1) <= 0) - break; - total++; - } - total++; - - parse_key(buffer, total, key); - - // DEBUG - - fprintf(stderr, "%s %d %d %s %d\n", buffer, buffer[0], buffer[1], key->c.c, key->c.len); - - return key; + return (unsigned char)c; } int getCursorPosition(int *rows, int *cols) { diff --git a/src/utf8.c b/src/utf8.c new file mode 100644 index 0000000..10db8b3 --- /dev/null +++ b/src/utf8.c @@ -0,0 +1,148 @@ +/** + * @file utf8.c + */ + +#include "../include/utf8.h" +#include "../include/data.h" + +#include +#include + + +uint32_t readUtf8Char(void) +{ + unsigned char buf[4]; + + read(STDIN_FILENO, &buf[0], 1); + + int extra; + uint32_t cp; + + if (buf[0] < 0x80) + { + cp = buf[0]; + extra = 0; + } + else if (buf[0] < 0xC0) { return 0xFFFD; } // stray continuation + else if (buf[0] < 0xE0) + { + cp = buf[0] & 0x1F; + extra = 1; + } + else if (buf[0] < 0xF0) + { + cp = buf[0] & 0x0F; + extra = 2; + } + else + { + cp = buf[0] & 0x07; + extra = 3; + } + + if (extra > 0) + { + read(STDIN_FILENO, &buf[1], extra); // read remaining bytes at once + for (int i = 0; i < extra; i++) + cp = (cp << 6) | (buf[1 + i] & 0x3F); + } + + return cp; +} + +uint32_t utf8Decode(const char** s) +{ + unsigned char c = (unsigned char)**s; + uint32_t cp; + int extra; + if (c < 0x80) + { + cp = c; + extra = 0; + } + else if (c < 0xC0) + { + (*s)++; + return 0xFFFD; + } + else if (c < 0xE0) + { + cp = c & 0x1F; + extra = 1; + } + else if (c < 0xF0) + { + cp = c & 0x0F; + extra = 2; + } + else + { + cp = c & 0x07; + extra = 3; + } + (*s)++; + while (extra--) + { + c = (unsigned char)**s; + if ((c & 0xC0) != 0x80) return 0xFFFD; + cp = (cp << 6) | (c & 0x3F); + (*s)++; + } + return cp; +} + +// buf must have at least 4 bytes; returns bytes written +int utf8Encode(uint32_t cp, char* buf) +{ + if (cp < 0x80) + { + buf[0] = cp; + return 1; + } + if (cp < 0x800) + { + buf[0] = 0xC0 | (cp >> 6); + buf[1] = 0x80 | (cp & 0x3F); + return 2; + } + if (cp < 0x10000) + { + buf[0] = 0xE0 | (cp >> 12); + buf[1] = 0x80 | ((cp >> 6) & 0x3F); + buf[2] = 0x80 | (cp & 0x3F); + return 3; + } + buf[0] = 0xF0 | (cp >> 18); + buf[1] = 0x80 | ((cp >> 12) & 0x3F); + buf[2] = 0x80 | ((cp >> 6) & 0x3F); + buf[3] = 0x80 | (cp & 0x3F); + return 4; +} + +int utf8Seqlen(unsigned char c) +{ + if (c < 0x80) return 1; + if (c < 0xC0) return 0; /* continuation — shouldn't be leading */ + if (c < 0xE0) return 2; + if (c < 0xF0) return 3; + return 4; +} + +/** + * @param codepoint utf8 codepoint of a char + * @return length of the codepoint + */ +int codepointWidth(uint32_t codepoint) +{ + if (codepoint < 0x20 || codepoint == 0x7F) return 0; + /* rough double-width ranges */ + if ((codepoint >= 0x1100 && codepoint <= 0x115F) || + (codepoint >= 0x2E80 && codepoint <= 0x303E) || + (codepoint >= 0x3041 && codepoint <= 0x33BF) || + (codepoint >= 0xAC00 && codepoint <= 0xD7AF) || + (codepoint >= 0xF900 && codepoint <= 0xFAFF) || + (codepoint >= 0xFF01 && codepoint <= 0xFF60) || + (codepoint >= 0x1F300 && codepoint <= 0x1FAFF)) + return 2; + return 1; +}