From 8e1b4d2f8652020ca7079897b195ccd31e4da2e9 Mon Sep 17 00:00:00 2001 From: arthur barraux Date: Sun, 3 May 2026 23:32:40 +0200 Subject: [PATCH] utf8 processing without struct --- include/append_buffer.h | 2 +- include/builtins.h | 2 - include/data.h | 37 +---- include/define.h | 13 +- include/editor_op.h | 4 +- include/file_io.h | 3 - include/input.h | 6 +- include/row_op.h | 18 +-- include/terminal.h | 6 +- include/utf8.h | 16 ++ install.sh | 0 main.c | 3 - meson.build | 1 + src/append_buffer.c | 6 +- src/builtins.c | 69 +++------ src/editor_op.c | 114 ++++++--------- src/file_io.c | 186 +++++++++--------------- src/init.c | 3 +- src/input.c | 314 +++++++++++++++++----------------------- src/output.c | 87 ++++++----- src/row_op.c | 227 ++++++----------------------- src/terminal.c | 278 +++++++++++------------------------ src/utf8.c | 148 +++++++++++++++++++ 23 files changed, 637 insertions(+), 906 deletions(-) create mode 100644 include/utf8.h mode change 100755 => 100644 install.sh create mode 100644 src/utf8.c diff --git a/include/append_buffer.h b/include/append_buffer.h index dc266e4..3f28992 100644 --- a/include/append_buffer.h +++ b/include/append_buffer.h @@ -5,7 +5,7 @@ #include #include -void abAppend(struct abuf *ab, const unsigned char *s, int len); +void abAppend(struct abuf *ab, const char *s, int len); void abFree(struct abuf *ab); diff --git a/include/builtins.h b/include/builtins.h index 4c5b3ff..ce946d0 100644 --- a/include/builtins.h +++ b/include/builtins.h @@ -31,8 +31,6 @@ Lisp editorPrintC(Lisp args, LispError *e, LispContext ctx); Lisp addPackage(Lisp args, LispError *e, LispContext ctx); -Lisp editorDelRow_L(Lisp args, LispError *e, LispContext ctx); - Lisp editorFind_L(Lisp args, LispError *e, LispContext ctx); Lisp editorReadChar_L(Lisp args, LispError *e, LispContext ctx); diff --git a/include/data.h b/include/data.h index 68b236a..01d1617 100644 --- a/include/data.h +++ b/include/data.h @@ -8,23 +8,17 @@ #include "lisp.h" -typedef struct{ - unsigned char c[4]; - char len; -} utf_8_char_t; - /** - * \struct erow + * \struct row_t * \brief Store one editor row * \param * */ -typedef struct erow { +typedef struct row { int size; /**< Size of the line */ - int rsize; /**< Size of the render line */ - utf_8_char_t *chars; /**< Characters of the line */ - utf_8_char_t *render; /**< The actual line we will print */ -} erow; + int cap; /**< Size of the render line */ + char *chars; /**< Characters of the line */ +} row_t; enum editorStatus_e { IDLE, @@ -57,24 +51,9 @@ typedef enum { MOD_CTRL = 4 } KeyModifier; -// Key information structure -typedef struct { - KeyType type; - int modifiers; // Bitmask of KeyModifier - union { - unsigned int codepoint; // For KEY_CHAR - char ctrl_char; // For KEY_CTRL (A-Z) - char alt_char; // For KEY_ALT - char arrow; // For KEY_ARROW (U/D/L/R) - int function_num; // For KEY_FUNCTION (1-12) - char special; // For KEY_SPECIAL and KEY_NAVIGATION - } data; - utf_8_char_t c; // Raw bytes -} KeyInfo; - struct keyBind_t { - KeyInfo *key_sequence; + char *key_sequence; Lisp command; }; @@ -90,7 +69,7 @@ struct editorConfig { int screenrows; /**< Terminal height*/ int screencols; /**< Terminal width*/ int numrows; /**< Number of rows contained */ - erow *row; /**< Store all the rows printed */ + row_t *rows; /**< Store all the rows printed */ int dirty; char *filename; enum editorStatus_e state; @@ -118,7 +97,7 @@ struct editorConfig { * */ struct abuf { - unsigned char *b; /**< Text that will be printed */ + char *b; /**< Text that will be printed */ int len; /**< Length of the text */ }; diff --git a/include/define.h b/include/define.h index 17c0ec9..f3f0b18 100644 --- a/include/define.h +++ b/include/define.h @@ -11,7 +11,18 @@ #define TAB "\x09" #define SPACE "\x20" - +enum editorKey_e { + BACKSPACE = 127, + ARROW_LEFT = 1000, + ARROW_RIGHT, + ARROW_UP, + ARROW_DOWN, + DEL_KEY, + BEG_LINE, + END_LINE, + PAGE_UP, + PAGE_DOWN, + }; #define ABUF_INIT {NULL, 0} diff --git a/include/editor_op.h b/include/editor_op.h index a67c7df..104387e 100644 --- a/include/editor_op.h +++ b/include/editor_op.h @@ -2,9 +2,7 @@ #define EDITOR_OP_H_ #include "data.h" -void editorInsertChar(utf_8_char_t *c); - -void editorInsertNewLine(void); +void editorInsertChar(int c); void editorDelChar(); diff --git a/include/file_io.h b/include/file_io.h index f8ee453..7dc9902 100644 --- a/include/file_io.h +++ b/include/file_io.h @@ -8,9 +8,6 @@ #include #include -char *editorRowsToString(int *buffer_len); - - void editorCloseFile(void); void editorOpen(char *filename); diff --git a/include/input.h b/include/input.h index 8799904..29d35c6 100644 --- a/include/input.h +++ b/include/input.h @@ -22,11 +22,9 @@ char *editorPrompt(char *prompt, char * PlaceHolder, char bPathMode); -char *key_to_string(int key); +void editorMoveCursor(int key); -void editorMoveCursor(KeyInfo * key); - -int executeKeyBind(KeyInfo *key_sequence); +int executeKeyBind(char *key_sequence); /** * \fn void editorProcessKeypress() diff --git a/include/row_op.h b/include/row_op.h index fe350b9..c68f1f7 100644 --- a/include/row_op.h +++ b/include/row_op.h @@ -8,22 +8,16 @@ #include #include -int editorRowCxToRx(erow *row, int cursor_x); +void editorInsertRow(int at, char *s, int len); -int editorRowRxToCx(erow *row, int rx); +void editorFreeRow(row_t *row); -void editorUpdateRow(erow *row); +int editorRowCxToByte(const row_t *row, int cursor_x); -void editorInsertRow(int at, char *s, size_t len); +int editorRowCharCount(row_t *row); -void editorFreeRow(erow *row); +void editorRowInsertBytes(row_t *row, int at, const char *src, int len); -void editorDelRow(int at); - -void editorRowInsertChar(erow *row, int at, utf_8_char_t c); - -void editorRowAppendString(erow *row, char *s, size_t len); - -void editorRowDelchar(erow *row, int at); +void editorRowDelByte(row_t *row, int at, int n); #endif // ROW_OP_H_ diff --git a/include/terminal.h b/include/terminal.h index d61a569..416b7fa 100644 --- a/include/terminal.h +++ b/include/terminal.h @@ -25,12 +25,12 @@ void disableRawMode(); void enableRawMode(); -KeyInfo * editorReadKey(); +int editorReadKey(); int getCursorPosition(int *rows, int *cols); -KeyInfo *stringToCodepoint(const char *string); - int getWindowSize(int *rows, int *cols); +char *key_to_string(int key); + #endif diff --git a/include/utf8.h b/include/utf8.h new file mode 100644 index 0000000..2c3f425 --- /dev/null +++ b/include/utf8.h @@ -0,0 +1,16 @@ +// +// Created by Giorgio on 01/05/2026. +// + +#ifndef BELUGA_UTF8_H +#define BELUGA_UTF8_H +#include + +uint32_t readUtf8Char(void); +int utf8Encode(uint32_t cp, char *buf); +int utf8Seqlen(unsigned char c); +int codepointWidth(uint32_t codepoint); +uint32_t utf8Decode(const char** s); + + +#endif //BELUGA_UTF8_H diff --git a/install.sh b/install.sh old mode 100755 new mode 100644 diff --git a/main.c b/main.c index 2cd2716..87451df 100644 --- a/main.c +++ b/main.c @@ -30,15 +30,12 @@ int main(int argc, char *argv[]) { char * splash_screen = (char *) calloc(256, sizeof(char)); - // Set support for utf-8 - setlocale(LC_ALL, ""); // INIT enableRawMode(); initEditor(); if (argc >= 2) { - E.state = READ_AND_WRITE; editorOpen(argv[1]); } else { strcat(splash_screen, getenv("HOME")); diff --git a/meson.build b/meson.build index 37ebd43..1a62b17 100644 --- a/meson.build +++ b/meson.build @@ -20,6 +20,7 @@ src_files = files( 'src/row_op.c', 'src/terminal.c', 'src/builtins.c', + 'src/utf8.c' ) # Executable diff --git a/src/append_buffer.c b/src/append_buffer.c index 6afb831..38cdc05 100644 --- a/src/append_buffer.c +++ b/src/append_buffer.c @@ -1,9 +1,7 @@ #include "../include/append_buffer.h" -extern struct editorConfig E; - -void abAppend(struct abuf *ab, const unsigned char *s, int len) { - unsigned char *new = realloc(ab->b, ab->len + len); +void abAppend(struct abuf *ab, const char *s, int len) { + char *new = realloc(ab->b, ab->len + len); if (new == NULL) { return; diff --git a/src/builtins.c b/src/builtins.c index b580197..28e69ad 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -5,31 +5,22 @@ #include "../include/editor_op.h" #include "../include/row_op.h" #include "../include/data.h" -#include "../include/terminal.h" #include #include #include -utf_8_char_t make_utf8_char(const char *bytes, int len) { - utf_8_char_t ch; - ch.len = len; - memcpy(ch.c, bytes, len); - return ch; -} - Lisp mapKey(Lisp args, LispError *e, LispContext ctx) { - const char *key_string = lisp_string(lisp_car(args)); - KeyInfo *key = stringToCodepoint(key_string); + const char *key_sequence = lisp_string(lisp_car(args)); args = lisp_cdr(args); // second argument Lisp func = lisp_car(args); E.key_binds = (struct keyBind_t *)realloc(E.key_binds, ++E.number_of_keybinds * sizeof(struct keyBind_t)); - E.key_binds[E.number_of_keybinds - 1].key_sequence = (KeyInfo *) malloc(sizeof(KeyInfo)); + E.key_binds[E.number_of_keybinds - 1].key_sequence = (char *) malloc(50 * sizeof(char)); - memcpy(E.key_binds[E.number_of_keybinds - 1].key_sequence, key, sizeof(KeyInfo)); + strncpy(E.key_binds[E.number_of_keybinds - 1].key_sequence, key_sequence, 50); E.key_binds[E.number_of_keybinds - 1].command = func; @@ -38,30 +29,26 @@ Lisp mapKey(Lisp args, LispError *e, LispContext ctx) { Lisp moveCursor(Lisp args, LispError *e, LispContext ctx) { const char *direction = lisp_string(lisp_car(args)); - KeyInfo key; - key.type = KEY_ARROW; switch (direction[0]) { case 'u': - key.data.arrow = 'A'; + editorMoveCursor(ARROW_UP); break; case 'd': - key.data.arrow = 'B'; + editorMoveCursor(ARROW_DOWN); break; case 'r': - key.data.arrow = 'C'; + editorMoveCursor(ARROW_RIGHT); break; case 'l': - key.data.arrow = 'D'; + editorMoveCursor(ARROW_LEFT); break; } - editorMoveCursor(&key); return lisp_null(); } Lisp editorQuit(Lisp args, LispError* e, LispContext ctx) { - fprintf(stderr, "quit\n"); if (E.dirty && E.quit_times_buffer > 0) { editorSetStatusMessage("WARNING! Changes hasn't been saved. Press Ctrl-Q " "another time to quit."); @@ -75,26 +62,27 @@ Lisp editorQuit(Lisp args, LispError* e, LispContext ctx) { return lisp_null(); - + } Lisp l_editorSave(Lisp args, LispError* e, LispContext ctx) { editorSave(); - + return lisp_null(); - + } Lisp l_editorInsertNewLine(Lisp args, LispError* e, LispContext ctx) { - // editorInsertNewLine(); - + editorInsertRow(E.numrows,"", 0); + editorMoveCursor(ARROW_DOWN); + return lisp_null(); - + } Lisp moveCursorBeginLine(Lisp args, LispError *e, LispContext ctx) { @@ -104,7 +92,7 @@ Lisp moveCursorBeginLine(Lisp args, LispError *e, LispContext ctx) { Lisp moveCursorEndLine(Lisp args, LispError* e, LispContext ctx) { if (E.cursor_y < E.numrows) { - E.cursor_x = E.row[E.cursor_y].size; + E.cursor_x = E.rows[E.cursor_y].size; } return lisp_null(); } @@ -118,11 +106,8 @@ Lisp deletePreviousChar(Lisp args, LispError* e, LispContext ctx) { Lisp editorMoveCursorPageUp(Lisp args, LispError* e, LispContext ctx) { E.cursor_y = E.row_offset; int times = E.screenrows; - KeyInfo key; - key.type = KEY_ARROW; - key.data.arrow = 'D'; while (--times) { - editorMoveCursor(&key); + editorMoveCursor(ARROW_UP); } return lisp_null(); } @@ -133,11 +118,8 @@ Lisp editorMoveCursorPageDown(Lisp args, LispError* e, LispContext ctx) { E.cursor_y = E.numrows; } int times = E.screenrows; - KeyInfo key; - key.type = KEY_ARROW; - key.data.arrow = 'D'; while (--times) { - editorMoveCursor(&key); + editorMoveCursor(ARROW_DOWN); } return lisp_null(); @@ -153,9 +135,8 @@ Lisp editorOpenFile(Lisp args, LispError *e, LispContext ctx) { Lisp editorPrintC(Lisp args, LispError *e, LispContext ctx) { - char *c = lisp_string(lisp_car(args)); - utf_8_char_t ch = make_utf8_char(c, 1); - editorInsertChar(&ch); + char c = lisp_string(lisp_car(args))[0]; + editorInsertChar(c); return lisp_null(); } @@ -172,14 +153,9 @@ Lisp addPackage(Lisp args, LispError *e, LispContext ctx) { E.ctx); fclose(fd_package); free(package_dir); - - return lisp_null(); - -} -Lisp editorDelRow_L(Lisp args, LispError *e, LispContext ctx) { - editorDelRow(E.cursor_y); return lisp_null(); + } Lisp editorFind_L(Lisp args, LispError *e, LispContext ctx) { @@ -188,8 +164,7 @@ Lisp editorFind_L(Lisp args, LispError *e, LispContext ctx) { } Lisp editorReadChar_L(Lisp args, LispError *e, LispContext ctx) { - // fprintf(stderr, "char read : %c\n", E.row[E.cursor_y].render[E.cursor_x]); - // return lisp_make_char(E.row[E.cursor_y].render[E.cursor_x]); - return lisp_null(); + fprintf(stderr, "char read : %c\n", E.rows[E.cursor_y].chars[E.cursor_x]); + return lisp_make_char(E.rows[E.cursor_y].chars[E.cursor_x]); } diff --git a/src/editor_op.c b/src/editor_op.c index cbc5921..a7012b8 100644 --- a/src/editor_op.c +++ b/src/editor_op.c @@ -1,94 +1,72 @@ #include "../include/editor_op.h" #include "../include/row_op.h" -#include "include/data.h" +#include "../include/data.h" #include +#include "../include/utf8.h" + extern struct editorConfig E; -void editorInsertChar(utf_8_char_t *c) { +void editorInsertChar(int c) { if (E.state == READ_ONLY) return; - fprintf(stderr, "Insert char %s %d\n", c->c, c->len); - // If cursor is past end of file, add empty rows - if (E.cursor_y == E.numrows) { + if (E.cursor_y == E.numrows) editorInsertRow(E.numrows, "", 0); + row_t *row = &E.rows[E.cursor_y]; + int byte = editorRowCxToByte(row, E.cursor_x); + char buf[4]; + int n; + if (c < 0x80) { + buf[0] = c; + n = 1; + } else { + n = utf8Encode((uint32_t)c, buf); } - - // Insert character at cursor position - editorRowInsertChar(&E.row[E.cursor_y], E.cursor_x, *c); + editorRowInsertBytes(row, byte, buf, n); E.cursor_x++; + E.dirty = 1; } -void editorInsertNewline(void) { +void editorInsertNewline(const char* s, int len) { if (E.state == READ_ONLY) return; - if (E.cursor_x == 0) { - // Insert blank line before current line - editorInsertRow(E.cursor_y, "", 0); - } else { - // Split current line at cursor - erow *row = &E.row[E.cursor_y]; - - // Calculate byte length of remaining part - int remaining_chars = row->size - E.cursor_x; - - // Allocate buffer for remaining characters - char *buf = malloc(remaining_chars * 4); // Max 4 bytes per UTF-8 char - int buf_len = 0; - - // Convert utf_8_char_t to bytes - for (int i = E.cursor_x; i < row->size; i++) { - for (int j = 0; j < row->chars[i].len; j++) { - buf[buf_len++] = row->chars[i].c[j]; - } - } - - // Insert new row with remaining text - editorInsertRow(E.cursor_y + 1, buf, buf_len); - free(buf); - - // Truncate current row at cursor - row = &E.row[E.cursor_y]; // Refresh pointer after realloc - row->size = E.cursor_x; - editorUpdateRow(row); - } - - E.cursor_y++; - E.cursor_x = 0; + E.rows = realloc(E.rows, sizeof(row_t) * (E.numrows + 1)); + row_t *r = &E.rows[E.numrows]; + r->cap = len + 1; + r->chars = malloc(r->cap); + memcpy(r->chars, s, len); + r->size = len; + r->chars[len] = '\0'; + E.numrows++; } -void editorRowAppendRow(erow *dest, erow *src) { - // Allocate space for combined rows - utf_8_char_t *new_chars = realloc(dest->chars, - sizeof(utf_8_char_t) * (dest->size + src->size)); - if (!new_chars) return; - - dest->chars = new_chars; - - // Copy source row characters - memcpy(&dest->chars[dest->size], src->chars, sizeof(utf_8_char_t) * src->size); - dest->size += src->size; - - editorUpdateRow(dest); - ++E.dirty; -} - -void editorDelChar(void) { +void editorDelChar(void) +{ if (E.state == READ_ONLY) return; if (E.cursor_y == E.numrows) return; if (E.cursor_x == 0 && E.cursor_y == 0) return; - - erow *row = &E.row[E.cursor_y]; - + + row_t *r = &E.rows[E.cursor_y]; if (E.cursor_x > 0) { - // Delete character before cursor - editorRowDelchar(row, E.cursor_x - 1); + /* find byte of previous char */ + int byte = editorRowCxToByte(r, E.cursor_x); + /* step back one character */ + int start = byte; + /* walk from beginning to find start of char at cx-1 */ + start = editorRowCxToByte(r, E.cursor_x - 1); + editorRowDelByte(r, start, byte - start); E.cursor_x--; + E.dirty = 1; } else { - // At beginning of line - join with previous line - E.cursor_x = E.row[E.cursor_y - 1].size; - editorRowAppendRow(&E.row[E.cursor_y - 1], row); - editorDelRow(E.cursor_y); + /* merge with previous row */ + row_t *prev = &E.rows[E.cursor_y - 1]; + E.cursor_x = editorRowCharCount(prev); + editorRowInsertBytes(prev, prev->size, r->chars, r->size); + free(r->chars); + memmove(&E.rows[E.cursor_y], &E.rows[E.cursor_y + 1], + sizeof(row_t) * (E.numrows - E.cursor_y - 1)); + E.numrows--; E.cursor_y--; + E.dirty = 1; } } diff --git a/src/file_io.c b/src/file_io.c index 811d01a..a35233d 100644 --- a/src/file_io.c +++ b/src/file_io.c @@ -9,60 +9,25 @@ #include #include -extern char *strdup(const char *); -extern ssize_t getline(char **restrict lineptr, size_t *restrict n, - FILE *restrict stream); -extern int ftruncate(int fd, off_t length); extern struct editorConfig E; -// Convert utf_8_char_t array to byte string -char *editorRowsToString(int *buffer_len) { - int tot_len = 0; - int j, i; - char *buf; - char *p; - // Calculate total byte length (not character count) - for (j = 0; j < E.numrows; ++j) { - // Count actual bytes in each character - for (i = 0; i < E.row[j].size; i++) { - tot_len += E.row[j].chars[i].len; - } - tot_len++; // For newline - } - - *buffer_len = tot_len; - buf = malloc(tot_len); - if (!buf) return NULL; - - p = buf; - for (j = 0; j < E.numrows; ++j) { - // Copy each character's bytes - for (i = 0; i < E.row[j].size; i++) { - for (int k = 0; k < E.row[j].chars[i].len; k++) { - *p++ = E.row[j].chars[i].c[k]; - } - } - *p++ = '\n'; - } - - return buf; -} - -void editorCloseFile(void) { +void editorCloseFile(void) +{ // Free all rows - for (int i = 0; i < E.numrows; i++) { - editorFreeRow(&E.row[i]); + for (int i = 0; i < E.numrows; i++) + { + editorFreeRow(&E.rows[i]); } - + E.cursor_x = 0; E.cursor_y = 0; E.rx = 0; E.row_offset = 0; E.col_offset = 0; E.numrows = 0; - free(E.row); - E.row = NULL; + free(E.rows); + E.rows = NULL; E.dirty = 0; free(E.filename); E.filename = NULL; @@ -70,11 +35,13 @@ void editorCloseFile(void) { E.status_msg_time = 0; } -void editorOpen(char *filename) { - FILE *fp; +void editorOpen(char* filename) +{ + FILE* fp; // Test if a file is already open - if (E.filename != NULL) { + if (E.filename != NULL) + { editorCloseFile(); } E.state = READ_AND_WRITE; @@ -82,136 +49,117 @@ void editorOpen(char *filename) { E.filename = strdup(filename); fp = fopen(filename, "r"); - if (!fp) { + fprintf(stderr, "reading file %s\n", filename); + if (!fp) + { // File doesn't exist - that's okay, we'll create it on save E.dirty = 0; return; } - char *line = NULL; - size_t line_cap = 0; - ssize_t line_len; + char* line = NULL; + size_t line_len; - while ((line_len = getline(&line, &line_cap, fp)) != -1) { + while ((line_len = getline(&line, &line_len, fp)) != -1) + { + fprintf(stderr, "%s %d", line, line_len); // Strip newline characters while (line_len > 0 && - (line[line_len - 1] == '\n' || line[line_len - 1] == '\r')) { + (line[line_len - 1] == '\n' || line[line_len - 1] == '\r')) + { --line_len; } + fprintf(stderr, "len %d\n", line_len); // editorInsertRow will convert bytes to utf_8_char_t - editorInsertRow(E.numrows, line, line_len); + fprintf(stderr, "row number : %d\n", E.numrows); + editorInsertRow(E.numrows, line, (int) line_len); } free(line); fclose(fp); E.dirty = 0; } -void editorSave() { +void editorSave() +{ int len; - char *buf; + char* buf; int fd; - - if (E.filename == NULL) { + + if (E.filename == NULL) + { E.filename = editorPrompt("Save as: %s (ESC to cancel)", "", 1); - if (E.filename == NULL) { + if (E.filename == NULL) + { editorSetStatusMessage("Save aborted"); return; } } - - buf = editorRowsToString(&len); - if (!buf) { - editorSetStatusMessage("Can't save! Memory error"); - return; - } - + fd = open(E.filename, O_RDWR | O_CREAT | O_TRUNC, 0644); - if (fd != -1) { - if (write(fd, buf, len) == len) { - close(fd); - free(buf); - E.dirty = 0; - editorSetStatusMessage("%d bytes written to disk", len); - return; - } - close(fd); + for (int i = 0; i < E.numrows; i++) + { + write(fd, E.rows[i].chars, E.rows[i].size); + write(fd, "\n", 1); + // fputc('\n', fp); } - - free(buf); - editorSetStatusMessage("Can't save! I/O error: %s", strerror(errno)); + close(fd); + + E.dirty = 0; + editorSetStatusMessage("%d bytes written to disk", len); } -// Helper to convert utf_8_char_t array to byte string for searching -static char *row_to_string(erow *row) { - // Calculate byte length - int byte_len = 0; - for (int i = 0; i < row->rsize; i++) { - byte_len += row->render[i].len; - } - - char *str = malloc(byte_len + 1); - if (!str) return NULL; - - // Convert to bytes - int pos = 0; - for (int i = 0; i < row->rsize; i++) { - for (int j = 0; j < row->render[i].len; j++) { - str[pos++] = row->render[i].c[j]; - } - } - str[pos] = '\0'; - - return str; -} - -void editorFind() { - char *query = editorPrompt("Search: %s (ESC to cancel)", "", 0); +void editorFind() +{ + char* query = editorPrompt("Search: %s (ESC to cancel)", "", 0); if (query == NULL) return; - + int saved_cursor_x = E.cursor_x; int saved_cursor_y = E.cursor_y; int saved_row_offset = E.row_offset; int saved_col_offset = E.col_offset; - +#if 0 // Search from current position forward - for (int i = E.cursor_y; i < E.numrows; i++) { - erow *row = &E.row[i]; - + for (int i = E.cursor_y; i < E.numrows; i++) + { + row_t* row = &E.rows[i]; + // Convert row to byte string for searching - char *render_str = row_to_string(row); + char* render_str = row_to_string(row); if (!render_str) continue; - - char *match = strstr(render_str, query); - if (match) { + + char* match = strstr(render_str, query); + if (match) + { E.cursor_y = i; - + // Find the character index from byte position int byte_pos = match - render_str; int char_idx = 0; int current_byte = 0; - - for (char_idx = 0; char_idx < row->rsize; char_idx++) { + + for (char_idx = 0; char_idx < row->rsize; char_idx++) + { if (current_byte >= byte_pos) break; current_byte += row->render[char_idx].len; } - + E.cursor_x = editorRowRxToCx(row, char_idx); E.row_offset = E.numrows; // Force scroll - + free(render_str); free(query); return; } - + free(render_str); } - + // Not found - restore cursor position E.cursor_x = saved_cursor_x; E.cursor_y = saved_cursor_y; E.row_offset = saved_row_offset; E.col_offset = saved_col_offset; - +#endif editorSetStatusMessage("Not found: %s", query); free(query); } diff --git a/src/init.c b/src/init.c index a0e5ac0..acb18a2 100644 --- a/src/init.c +++ b/src/init.c @@ -32,7 +32,6 @@ void initBuiltins() { registerBuiltin("EDITOR-OPEN-FILE", editorOpenFile); registerBuiltin("EDITOR-INSERT-CHAR", editorPrintC); registerBuiltin("ADD-PACKAGE", addPackage); - registerBuiltin("EDITOR-DEL-ROW", editorDelRow_L); registerBuiltin("EDITOR-FIND", editorFind_L); registerBuiltin("EDITOR-READ-CHAR", editorReadChar_L); } @@ -45,7 +44,7 @@ void initEditor() { E.row_offset = 0; E.col_offset = 0; E.numrows = 0; - E.row = NULL; + E.rows = NULL; E.dirty = 0; E.filename = NULL; E.state = READ_ONLY; diff --git a/src/input.c b/src/input.c index 0632fff..a85f273 100644 --- a/src/input.c +++ b/src/input.c @@ -13,236 +13,182 @@ #include extern struct editorConfig E; - -char *file_completion(const char *path) { - DIR *dir; - struct dirent *entry; - char directory[128]; - char predict[128]; - int predict_len = 0; +char * file_completion(const char *path) { + DIR * dir; + struct dirent *entry; + char directory[128]; + char predict[128]; + int predict_len = 0; if (path[strlen(path) - 1] == '/') { - return strdup(path); + return path; } - // Find dir name - char *last_slash = strrchr(path, '/'); - if (last_slash) { - size_t dir_len = last_slash - path + 1; - strncpy(directory, path, dir_len); - predict_len = strlen(path) - dir_len; - strncpy(predict, last_slash + 1, predict_len); - directory[dir_len] = '\0'; - predict[predict_len] = '\0'; - } else { - return NULL; - } + // Find dir name + char * last_slash = strrchr(path, '/'); + if (last_slash) { + size_t dir_len = last_slash - path + 1; // length of dir_path + strncpy(directory, path, dir_len); + predict_len = strlen(path) - dir_len - 1; + strncpy(predict, last_slash + 1, predict_len); + directory[dir_len] = '\0'; + predict[predict_len] = '\0'; + fprintf(stderr, "%s %s\n", directory, predict); + } else { + return NULL; + } + dir = opendir(directory); if (!dir) + return NULL; + + while ((entry = readdir(dir)) != NULL) { + if (strncmp(entry->d_name, predict, predict_len) == 0) { + static char full_path[128]; + snprintf(full_path, sizeof(full_path), "%s%s", directory, entry->d_name); + + struct stat st; + if (stat(full_path, &st) == 0 && S_ISDIR(st.st_mode)) { + strcat(full_path, "/"); // add slash for directories + } + + return strdup(full_path); + } + } + + // Cleanup when no more entries + closedir(dir); + dir = NULL; return NULL; - while ((entry = readdir(dir)) != NULL) { - if (strncmp(entry->d_name, predict, predict_len) == 0) { - static char full_path[128]; - snprintf(full_path, sizeof(full_path), "%s%s", directory, entry->d_name); - - struct stat st; - if (stat(full_path, &st) == 0 && S_ISDIR(st.st_mode)) { - strcat(full_path, "/"); - } - closedir(dir); - return strdup(full_path); - } - } - - closedir(dir); - return NULL; } /** * \fn char * editorPrompt(struct editorConfig *E, char *prompt, char bPathMode) * \brief Return user input in a prompt when enter is hit. */ -char *editorPrompt(char *prompt, char *placeHolder, char bPathMode) { +char *editorPrompt(char *prompt, char * placeHolder, char bPathMode) { size_t buf_size = 128; char *buf = malloc(buf_size); size_t buf_len = 0; + int c = 0; buf[0] = '\0'; - strcpy(buf, placeHolder); - buf_len = strlen(placeHolder); + strcpy(buf, placeHolder); + buf_len = strlen(placeHolder); while (1) { - editorSetStatusMessage(prompt, buf); + editorSetStatusMessage(prompt, buf); editorRefreshScreen(); - - KeyInfo *key = editorReadKey(); - - // Handle backspace/delete - if (key->type == KEY_SPECIAL && (key->data.special == 127 || key->data.special == 8)) { + c = editorReadKey(); + if (c == DEL_KEY || c == CTRL_KEY('h') || c == BACKSPACE) { if (buf_len != 0) { buf[--buf_len] = '\0'; } - } - // Handle Ctrl+H (backspace) - else if (key->type == KEY_CTRL && key->data.ctrl_char == 'H') { - if (buf_len != 0) { - buf[--buf_len] = '\0'; - } - } - // Handle ESC - else if (key->type == KEY_SPECIAL && key->data.special == 27) { + } else if (c == ESCAPE) { editorSetStatusMessage(""); free(buf); return NULL; - } - // Handle Enter - else if (key->type == KEY_SPECIAL && (key->data.special == 13 || key->data.special == 10)) { + } else if (c == '\r') { if (buf_len != 0) { editorSetStatusMessage(""); return buf; } - } - // Handle Tab for path completion - else if (bPathMode && key->type == KEY_SPECIAL && key->data.special == 9) { - char path[128]; - char *pwd; - if (buf[0] != '/') { - pwd = getenv("PWD"); - snprintf(path, sizeof(path), "%s/%s", pwd, buf); - } else { - strcpy(path, buf); - } - - char *completion = file_completion(path); - if (completion) { - memset(buf, 0, buf_size); - strcpy(buf, completion); - buf_len = strlen(buf); - free(completion); - } - } - // Handle regular characters (ASCII only for prompts) - else if (key->type == KEY_CHAR && key->data.codepoint < 128) { + } else if (bPathMode && c == '\t') { + char path[128]; + char * pwd; + if (buf[0] != '/') { + pwd = getenv("PWD"); + fprintf(stderr, "%s\n", pwd); + memcpy(path, pwd, strlen(pwd)); + path[strlen(pwd)] = '/'; + strncat(path, buf, buf_len); + } else { + strcpy(path, buf); + } + memset(buf, 0, 128); + buf_len = 0; + strcpy(buf, file_completion(path)); + buf_len = strlen(buf); + buf[buf_len] = '\0'; + + } else if (!iscntrl(c) && c < 128) { if (buf_len == buf_size - 1) { buf_size *= 2; buf = realloc(buf, buf_size); } - buf[buf_len++] = (char)key->data.codepoint; + buf[buf_len++] = c; buf[buf_len] = '\0'; } } } -void editorMoveCursor(KeyInfo *key) { - if (key->type != KEY_ARROW) return; - - erow *row = (E.cursor_y >= E.numrows) ? NULL : &E.row[E.cursor_y]; - int row_len; - - switch (key->data.arrow) { - case 'C': // Right - if (row && E.cursor_x < row->size) { - ++E.cursor_x; - } else if (row && E.cursor_x == row->size) { - E.cursor_y++; - E.cursor_x = 0; - } - break; - case 'B': // Down - if (E.cursor_y < E.numrows) { - ++E.cursor_y; - } - break; - case 'A': // Up - if (E.cursor_y != 0) { - --E.cursor_y; - } - break; - case 'D': // Left - if (E.cursor_x != 0) { - --E.cursor_x; - } else if (E.cursor_y > 0) { - --E.cursor_y; - E.cursor_x = E.row[E.cursor_y].size; - } - break; - } +void editorMoveCursor(int key) +{ + row_t *row = (E.cursor_y >= E.numrows) ? NULL : &E.rows[E.cursor_y]; + int row_len; + switch (key) { + case ARROW_RIGHT: + if (row && E.cursor_x < row->size) { + ++E.cursor_x; + } else if (row && E.cursor_x == row->size) { + E.cursor_y++; + E.cursor_x = 0; + } + break; + case ARROW_DOWN: + if (E.cursor_y < E.numrows) { + ++E.cursor_y; + } + break; + case ARROW_UP: + if (E.cursor_y != 0) { + --E.cursor_y; + } + break; + case ARROW_LEFT: + if (E.cursor_x != 0) { + --E.cursor_x; + } else if (E.cursor_y > 0) { + --E.cursor_y; + E.cursor_x = E.rows[E.cursor_y].size; + } + break; + } - row = (E.cursor_y >= E.numrows) ? NULL : &E.row[E.cursor_y]; - row_len = row ? row->size : 0; - if (E.cursor_x > row_len) { - E.cursor_x = row_len; - } + row = (E.cursor_y >= E.numrows) ? NULL : &E.rows[E.cursor_y]; + row_len = row ? row->size : 0; + if (E.cursor_x > row_len) { + E.cursor_x = row_len; + } } -KeyInfo *stringToCodepoint(const char *string) { - KeyInfo *key = (KeyInfo *)malloc(sizeof(KeyInfo)); - // test control key - if (!strncmp("CTRL", string, 4)) { - key->type = KEY_CTRL; - key->data.ctrl_char = toupper(string[6]) + 64; - } else if (!strncmp("ARROW", string, 5)) { - key->type = KEY_ARROW; - if (!strcmp("UP", string + 7)) { - key->data.arrow = 'A'; - } else if (!strcmp("DOWN", string + 7)) { - key->data.arrow = 'B'; - } else if (!strcmp("RIGHT", string + 7)) { - key->data.arrow = 'C'; - } else if (!strcmp("LEFT", string + 7)) { - key->data.arrow = 'D'; - } - } +int executeKeyBind(char *key_sequence) { + int i; + for (i = 0; i < E.number_of_keybinds; ++i) { + if (!strcmp(key_sequence, E.key_binds[i].key_sequence)) { - return key; -} - -static int key_match(KeyInfo *a, KeyInfo *b) { - if (a->type != b->type) return 0; - if (a->modifiers != b->modifiers) return 0; - - switch (a->type) { - case KEY_CTRL: - return toupper(a->data.ctrl_char) == toupper(b->data.ctrl_char); - case KEY_ALT: - return a->data.alt_char == b->data.alt_char; - case KEY_ARROW: - return a->data.arrow == b->data.arrow; - case KEY_FUNCTION: - return a->data.function_num == b->data.function_num; - case KEY_CHAR: - return a->data.codepoint == b->data.codepoint; - case KEY_SPECIAL: - case KEY_NAVIGATION: - return a->data.special == b->data.special; - default: - return 0; - } -} - -int executeKeyBind(KeyInfo *key_sequence) { - for (int i = 0; i < E.number_of_keybinds; ++i) { - fprintf(stderr, "Keybind found\n"); - if (key_match(key_sequence, E.key_binds[i].key_sequence)) { - // Execute the lisp command - lisp_eval(lisp_cons(E.key_binds[i].command, lisp_null(), E.ctx), - &E.ctx_error, E.ctx); - return 1; - } - } - return 0; + fprintf(stderr, "lisp function %s\n", key_sequence); + // It's a symbol, create a function call + lisp_eval(lisp_cons(E.key_binds[i].command, lisp_null(), E.ctx), + &E.ctx_error, E.ctx); + return 1; + } + } + return 0; } void editorProcessKeypress() { - KeyInfo *key = editorReadKey(); - if (!key) - return; + int c = editorReadKey(); + char* key_sequence; - if (executeKeyBind(key)) { - fprintf(stderr, "Keybinds found\n"); - return; - } - editorInsertChar(&key->c); - E.quit_times_buffer = E.constantes.QUIT_TIMES; -} + key_sequence = key_to_string(c); + fprintf(stderr, "%s\n", key_sequence); + + if (executeKeyBind(key_to_string(c))) { + return; + } + editorInsertChar(c); + E.quit_times_buffer = E.constantes.QUIT_TIMES; + +} \ No newline at end of file diff --git a/src/output.c b/src/output.c index b337628..b58c6dd 100644 --- a/src/output.c +++ b/src/output.c @@ -4,18 +4,7 @@ #include #include -extern struct editorConfig E; - -static void utf8_to_bytes(utf_8_char_t *chars, int count, unsigned char *output, int *output_len) { - int pos = 0; - for (int i = 0; i < count; i++) { - for (int j = 0; j < chars[i].len; j++) { - output[pos++] = chars[i].c[j]; - } - fprintf(stderr, "bytes length : %s %d\n", chars[i].c, pos); - } - *output_len = pos; -} +#include "include/utf8.h" void editorDrawRows(struct abuf *ab) { int y; @@ -24,6 +13,8 @@ void editorDrawRows(struct abuf *ab) { int padding; int len; int file_row; + row_t *row; + for (y = 0; y < E.screenrows; ++y) { file_row = y + E.row_offset; if (file_row >= E.numrows) { @@ -47,20 +38,22 @@ void editorDrawRows(struct abuf *ab) { abAppend(ab, "~", 1); } } else { - len = E.row[file_row].rsize - E.col_offset; - if (len < 0) - len = 0; - if (len > E.screencols) - len = E.screencols; - if (len > 0) { - unsigned char *display_buf = malloc(len * 4); // Max 4 bytes per char - int byte_len; - - utf8_to_bytes(&E.row[file_row].render[E.col_offset], len, display_buf, - &byte_len); - abAppend(ab, display_buf, byte_len); - fprintf(stderr, "display buffer : %s %d\n", display_buf, byte_len); - free(display_buf); + int rx = 0, i = 0; + int rendered = 0; + row_t *row = &E.rows[E.row_offset + y]; + while (i < row->size && rendered < E.screencols) { + int sl = utf8Seqlen((unsigned char)row->chars[i]); + if (sl < 1) sl = 1; + const char *p = row->chars + i; + uint32_t cp = utf8Decode(&p); + int w = codepointWidth(cp); if (w == 0) w = 1; + if (rx >= E.col_offset) { + if (rendered + w > E.screencols) break; + abAppend(ab, row->chars + i, sl); + rendered += w; + } + rx += w; + i += sl; } } abAppend(ab, ERASE_END_LINE, 3); @@ -68,24 +61,29 @@ void editorDrawRows(struct abuf *ab) { } } -void editorScroll() { - E.rx = E.cursor_x; - if (E.cursor_y < E.numrows) { - E.rx = editorRowCxToRx(&E.row[E.cursor_y], E.cursor_x); +int editorCxToRx(void) { + if (E.cursor_y >= E.numrows) return E.cursor_x; + row_t *row = &E.rows[E.cursor_y]; + int rx = 0, i = 0, col = 0; + while (col < E.cursor_x && i < row->size) { + int sl = utf8Seqlen((unsigned char)row->chars[i]); + if (sl < 1) sl = 1; + const char *p = row->chars + i; + uint32_t cp = utf8Decode(&p); + int w = codepointWidth(cp); + if (w == 0) w = 1; + rx += w; + i += sl; col++; } + return rx; +} - if (E.cursor_y < E.row_offset) { - E.row_offset = E.cursor_y; - } - if (E.cursor_y >= E.row_offset + E.screenrows) { - E.row_offset = E.cursor_y - E.screenrows + 1; - } - if (E.rx < E.col_offset) { - E.col_offset = E.rx; - } - if (E.rx >= E.col_offset + E.screencols) { - E.col_offset = E.rx - E.screencols + 1; - } +void editorScroll() { + E.rx = editorCxToRx(); + if (E.cursor_y < E.row_offset) E.row_offset = E.cursor_y; + if (E.cursor_y >= E.row_offset + E.screenrows) E.row_offset = E.cursor_y - E.screenrows + 1; + if (E.rx < E.col_offset) E.col_offset = E.rx; + if (E.rx >= E.col_offset + E.screencols) E.col_offset = E.rx - E.screencols + 1; } void editorDrawStatusBar(struct abuf *ab) { @@ -130,6 +128,7 @@ void editorRefreshScreen() { editorScroll(); struct abuf ab = ABUF_INIT; char buf[32]; + int len; abAppend(&ab, HIDE_CURSOR, 6); abAppend(&ab, CURSOR_TOP_LEFT, 3); @@ -138,9 +137,9 @@ void editorRefreshScreen() { editorDrawStatusBar(&ab); editorDrawMessageBar(&ab); - snprintf(buf, sizeof(buf), "\x1b[%d;%dH", (E.cursor_y - E.row_offset) + 1, + len = snprintf(buf, sizeof(buf), "\x1b[%d;%dH", (E.cursor_y - E.row_offset) + 1, (E.rx - E.col_offset) + 1); - abAppend(&ab, buf, strlen(buf)); + abAppend(&ab, buf, len); abAppend(&ab, SHOW_CURSOR, 6); diff --git a/src/row_op.c b/src/row_op.c index 1650f68..401e3b5 100644 --- a/src/row_op.c +++ b/src/row_op.c @@ -6,213 +6,65 @@ #include #include -extern struct editorConfig E; +#include "include/utf8.h" -static int is_tab(utf_8_char_t *ch) { - return ch->len == 1 && ch->c[0] == '\t'; -} - -// Helper function to check if two utf_8_char_t are equal -static int utf8_char_equal(utf_8_char_t *a, utf_8_char_t *b) { - if (a->len != b->len) return 0; - return memcmp(a->c, b->c, a->len) == 0; -} - -// Helper function to create a space character -static utf_8_char_t make_space() { - utf_8_char_t space; - space.c[0] = ' '; - space.len = 1; - return space; -} - -int editorRowCxToRx(erow *row, int cursor_x) { - int render_x = 0; - int i; - for (i = 0; i < cursor_x; ++i) { - if (is_tab(&row->chars[i])) { - render_x += (E.constantes.TAB_LENGTH - 1) - (render_x % E.constantes.TAB_LENGTH); - } - render_x++; - } - return render_x; -} - -int editorRowRxToCx(erow *row, int rx) { - int cur_rx = 0; - int cx; - for (cx = 0; cx < row->size; cx++) { - if (is_tab(&row->chars[cx])) - cur_rx += (E.constantes.TAB_LENGTH - 1) - (cur_rx % E.constantes.TAB_LENGTH); - cur_rx++; - if (cur_rx > rx) return cx; - } - return cx; -} - -/** - * \fn editorUpdateRow(erow *row) - * \brief Copy content of \p row in \p row->render. - * */ - -void editorUpdateRow(erow *row) { - int i, i_render; - int tabs = 0; - - // Count number of tabs - for (i = 0; i < row->size; ++i) { - if (is_tab(&row->chars[i])) { - tabs++; - } - } - - free(row->render); - // Allocate space for utf_8_char_t array - row->render = malloc(sizeof(utf_8_char_t) * (row->size + tabs * (E.constantes.TAB_LENGTH - 1))); - - if (!row->render) { - row->rsize = 0; - return; - } - - i_render = 0; - for (i = 0; i < row->size; ++i) { - if (is_tab(&row->chars[i])) { - // Replace tab with spaces - row->render[i_render++] = make_space(); - while (i_render % E.constantes.TAB_LENGTH) { - row->render[i_render++] = make_space(); - } - } else { - row->render[i_render++] = row->chars[i]; - } - } - row->rsize = i_render; -} - -void editorInsertRow(int at, char *s, size_t len) { +void editorInsertRow(int at, char *s, int len) { if (at < 0 || at > E.numrows) { return; } - erow *tmp = (erow *)realloc(E.row, sizeof(erow) * (E.numrows + 1)); + row_t *tmp = (row_t *)realloc(E.rows, sizeof(row_t) * (E.numrows + 1)); if (!tmp) { return; } - E.row = tmp; - memmove(&E.row[at + 1], &E.row[at], sizeof(erow) * (E.numrows - at)); + E.rows = tmp; + memmove(&E.rows[at + 1], &E.rows[at], sizeof(row_t) * (E.numrows - at)); // Initialize the new row - E.row[at].size = 0; - E.row[at].chars = NULL; - E.row[at].rsize = 0; - E.row[at].render = NULL; + E.rows[at].size = len; + E.rows[at].chars = NULL; - // Count UTF-8 characters first - int char_count = 0; - int i = 0; - while (i < len) { - unsigned char first = (unsigned char)s[i]; - int char_len; - - if ((first & 0x80) == 0) { - char_len = 1; - } else if ((first & 0xE0) == 0xC0) { - char_len = 2; - } else if ((first & 0xF0) == 0xE0) { - char_len = 3; - } else if ((first & 0xF8) == 0xF0) { - char_len = 4; - } else { - char_len = 1; // Invalid, treat as single byte - } - - i += char_len; - char_count++; - } + E.rows[at].cap = len + 1; + E.rows[at].chars = malloc(E.rows[at].cap); - // Allocate for the actual number of characters - if (char_count > 0) { - E.row[at].chars = malloc(sizeof(utf_8_char_t) * char_count); - if (!E.row[at].chars) { - return; - } - } - - // Now convert to utf_8_char_t array - i = 0; - E.row[at].size = 0; - while (i < len && E.row[at].size < char_count) { - utf_8_char_t ch; - - unsigned char first = (unsigned char)s[i]; - if ((first & 0x80) == 0) { - ch.len = 1; - } else if ((first & 0xE0) == 0xC0) { - ch.len = 2; - } else if ((first & 0xF0) == 0xE0) { - ch.len = 3; - } else if ((first & 0xF8) == 0xF0) { - ch.len = 4; - } else { - ch.len = 1; - } - - // Copy bytes - for (int j = 0; j < ch.len && i < len; j++) { - ch.c[j] = s[i++]; - } - - E.row[at].chars[E.row[at].size++] = ch; - } + memcpy(E.rows[at].chars, s, len); + E.rows[at].chars[len] = '\n'; - editorUpdateRow(&E.row[at]); - - ++E.numrows; ++E.dirty; } -void editorFreeRow(erow *row) { - free(row->render); +void editorFreeRow(row_t *row) { free(row->chars); } -void editorDelRow(int at) { - if (at < 0 || at >= E.numrows) { - return; +int editorRowCxToByte(const row_t *row, int cursor_x) { + int i = 0, col = 0; + while (col < cursor_x && i < row->size) { + int sl = utf8Seqlen((unsigned char)row->chars[i]); + if (sl < 1) sl = 1; + col++; + i += sl; } - editorFreeRow(&E.row[at]); - memmove(&E.row[at], &E.row[at + 1], sizeof(erow) * (E.numrows - at - 1)); - --E.numrows; - ++E.dirty; + return i; } /** * \fn editorRowInsertChar(erow *row, int at, int c) * \param at Index of where we want to insert the char */ -void editorRowInsertChar(erow *row, int at, utf_8_char_t c) { +void editorRowInsertBytes(row_t *row, int at, const char *src, int n) { if (E.state == READ_ONLY) return; - if (at < 0 || at > row->size) { - at = row->size; + if (row->size + n + 1 > row->cap) { + row->cap = (row->size + n + 1) * 2; + row->chars = realloc(row->chars, row->cap); } - row->chars = realloc(row->chars, row->size + 1); - memmove(&row->chars[at + 1], &row->chars[at], row->size - at + 1); - ++(row->size); - row->chars[at] = c; - fprintf(stderr, "Row insert : %s %d\n", c.c, c.len); - editorUpdateRow(row); - ++E.dirty; -} - -void editorRowAppendString(erow *row, char *s, size_t len) { - row->chars = realloc(row->chars, row->size + len + 1); - memcpy(&row->chars[row->size], s, len); - row->size += len; - editorUpdateRow(row); + memmove(row->chars + at + n, row->chars + at, row->size - at); + memcpy(row->chars + at, src, n); + row->size += n; + row->chars[row->size] = '\0'; ++E.dirty; } @@ -221,12 +73,19 @@ void editorRowAppendString(erow *row, char *s, size_t len) { * \brief Delete the a char at the chosen position on the given row * \param at Index of the char to delete * \param row Row on operation is made */ -void editorRowDelchar(erow *row, int at) { - if (at < 0 || at >= row->size) { - return; - } - memmove(&row->chars[at], &row->chars[at + 1], row->size - at); - --row->size; - editorUpdateRow(row); - ++E.dirty; +void editorRowDelByte(row_t *row, int at, int n) { + memmove(row->chars + at, row->chars + at + n, row->size - at - n); + row->size -= n; + row->chars[row->size] = '\0'; } + +int editorRowCharCount(row_t *row) +{ + int n = 0, i = 0; + while (i < row->size) { + int sl = utf8Seqlen((unsigned char)row->chars[i]); + if (sl < 1) sl = 1; + n++; i += sl; + } + return n; +} \ No newline at end of file diff --git a/src/terminal.c b/src/terminal.c index 29626ba..15899dc 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -1,10 +1,16 @@ #include "../include/terminal.h" + +#include + #include "../include/data.h" +#include "../include/define.h" #include #include #include +#include "include/utf8.h" + void die(const char *s) { write(STDOUT_FILENO, "\x1b[2J", 4); write(STDOUT_FILENO, CURSOR_TOP_LEFT, 3); @@ -37,211 +43,97 @@ void enableRawMode() { } } -int utf8_char_length(unsigned char first_byte) { - if ((first_byte & 0x80) == 0) - return 1; // 0xxxxxxx - ASCII - if ((first_byte & 0xE0) == 0xC0) - return 2; // 110xxxxx - 2 bytes - if ((first_byte & 0xF0) == 0xE0) - return 3; // 1110xxxx - 3 bytes - if ((first_byte & 0xF8) == 0xF0) - return 4; // 11110xxx - 4 bytes - return 1; // Invalid, treat as single byte -} +#include /* isprint */ -// Convert UTF-8 to Unicode code point -unsigned int utf8_to_codepoint(const unsigned char *bytes, int len) { - if (len == 1) - return bytes[0]; - if (len == 2) - return ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F); - if (len == 3) - return ((bytes[0] & 0x0F) << 12) | ((bytes[1] & 0x3F) << 6) | - (bytes[2] & 0x3F); - if (len == 4) - return ((bytes[0] & 0x07) << 18) | ((bytes[1] & 0x3F) << 12) | - ((bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F); - return 0; -} +char *key_to_string(int key) { + static char key_str[32]; -void parse_key(unsigned char *seq, int len, KeyInfo *key) { - memcpy(key->c.c, seq, len); - key->c.len = len; - key->modifiers = MOD_NONE; - key->type = KEY_UNKNOWN; - - // Control characters (Ctrl+A to Ctrl+Z) - if (len == 1 && seq[0] < 32 && seq[0] != 27 && seq[0] != 9 && seq[0] != 10 && - seq[0] != 13) { - key->type = KEY_CTRL; - key->data.ctrl_char = seq[0] + 64; - return; - } - - // Special single characters - if (len == 1) { - switch (seq[0]) { - case 9: - case 10: - case 13: - case 27: - case 127: - key->type = KEY_SPECIAL; - key->data.special = seq[0]; - return; + if (key == '\r') { + strcpy(key_str, "ENTER"); + } else if (key >= 1 && key <= 26) { + snprintf(key_str, sizeof(key_str), "CTRL-%c", 'a' + key - 1); + } else { + switch (key) { + case ARROW_UP: strcpy(key_str, "ARROW-UP"); break; + case ARROW_DOWN: strcpy(key_str, "ARROW-DOWN"); break; + case ARROW_LEFT: strcpy(key_str, "ARROW-LEFT"); break; + case ARROW_RIGHT: strcpy(key_str, "ARROW-RIGHT"); break; + case PAGE_UP: strcpy(key_str, "PAGE-UP"); break; + case PAGE_DOWN: strcpy(key_str, "PAGE-DOWN"); break; + case DEL_KEY: strcpy(key_str, "DEL"); break; + case BACKSPACE: strcpy(key_str, "BACKSPACE"); break; + case BEG_LINE: strcpy(key_str, "HOME"); break; + case END_LINE: strcpy(key_str, "END"); break; + case '\x1b': strcpy(key_str, "ESCAPE"); break; + default: + if (key > 127) { + /* UTF-8 code point — re-encode into the buffer */ + char buf[5] = {0}; + int n = utf8Encode((uint32_t)key, buf); + snprintf(key_str, sizeof(key_str), "%.*s", n, buf); + } else if (isprint(key)) { + snprintf(key_str, sizeof(key_str), "%c", key); + } else { + snprintf(key_str, sizeof(key_str), "KEY-%d", key); + } } } + return key_str; +} - // Escape sequences - if (len >= 2 && seq[0] == 27) { - // Alt+key combinations - if (len == 2 && seq[1] >= 32 && seq[1] < 127) { - key->type = KEY_ALT; - key->data.alt_char = seq[1]; - return; - } +int editorReadKey() { + char c; + /* read first byte — may be start of UTF-8 or escape */ + while (read(STDIN_FILENO, &c, 1) != 1); - // CSI sequences (ESC [ ...) - if (len >= 3 && seq[1] == '[') { - // Arrow keys - if (len == 3) { - switch (seq[2]) { - case 'A': - case 'B': - case 'C': - case 'D': - key->type = KEY_ARROW; - key->data.arrow = seq[2]; - return; - case 'H': - case 'F': - key->type = KEY_NAVIGATION; - key->data.special = seq[2]; - return; - } - } - - // Modified keys (ESC [ 1 ; modifier letter) - if (len >= 6 && seq[2] == '1' && seq[3] == ';') { - int modifier = seq[4] - '0'; - char k = seq[5]; - - if (modifier & 1) - key->modifiers |= MOD_SHIFT; - if (modifier & 2) - key->modifiers |= MOD_ALT; - if (modifier & 4) - key->modifiers |= MOD_CTRL; - - switch (k) { - case 'A': - case 'B': - case 'C': - case 'D': - key->type = KEY_ARROW; - key->data.arrow = k; - return; - case 'H': - case 'F': - key->type = KEY_NAVIGATION; - key->data.special = k; - return; - } - } - - // Function keys and navigation - if (len == 4 && seq[3] == '~') { - int num = seq[2] - '0'; - if (num >= 1 && num <= 6) { - key->type = KEY_NAVIGATION; - key->data.special = seq[2]; - return; - } - } - - if (len == 5 && seq[4] == '~') { - int num = (seq[2] - '0') * 10 + (seq[3] - '0'); - if (num >= 15 && num <= 24) { - key->type = KEY_FUNCTION; - // Map to F5-F12 - int f_map[] = {15, 17, 18, 19, 20, 21, 23, 24}; - for (int i = 0; i < 8; i++) { - if (f_map[i] == num) { - key->data.function_num = i + 5; - return; + if (c == '\x1b') { + char seq[6]; + /* try to read escape sequence */ + if (read(STDIN_FILENO, &seq[0], 1) != 1) return '\x1b'; + if (read(STDIN_FILENO, &seq[1], 1) != 1) return '\x1b'; + if (seq[0] == '[') { + if (seq[1] >= '0' && seq[1] <= '9') { + if (read(STDIN_FILENO, &seq[2], 1) != 1) return '\x1b'; + if (seq[2] == '~') { + switch (seq[1]) { + case '1': return BEG_LINE; + case '3': return DEL_KEY; + case '4': return END_LINE; + case '5': return PAGE_UP; + case '6': return PAGE_DOWN; + case '7': return BEG_LINE; + case '8': return END_LINE; + } + } + } else { + switch (seq[1]) { + case 'A': return ARROW_UP; + case 'B': return ARROW_DOWN; + case 'C': return ARROW_RIGHT; + case 'D': return ARROW_LEFT; + case 'H': return BEG_LINE; + case 'F': return END_LINE; + } } - } } - } + return '\x1b'; } - // SS3 sequences (ESC O ...) - if (len == 3 && seq[1] == 'O') { - switch (seq[2]) { - case 'P': - case 'Q': - case 'R': - case 'S': - key->type = KEY_FUNCTION; - key->data.function_num = seq[2] - 'P' + 1; - return; - case 'H': - case 'F': - key->type = KEY_NAVIGATION; - key->data.special = seq[2]; - return; - } + /* multi-byte UTF-8: read remaining bytes */ + int seqlen = utf8Seqlen((unsigned char)c); + if (seqlen > 1) { + /* pack into a pseudo-codepoint just to pass bytes through; + we handle encoding/decoding at the row level */ + char buf[4] = {c, 0, 0, 0}; + for (int i = 1; i < seqlen; i++) + if (read(STDIN_FILENO, &buf[i], 1) != 1) break; + /* decode and return as uint32, but we need int — use high range */ + const char *p = buf; + uint32_t cp = utf8Decode(&p); + return (int)cp; /* caller re-encodes when inserting */ } - } - // UTF-8 character - if (seq[0] >= 32 || (seq[0] & 0x80)) { - int char_len = utf8_char_length(seq[0]); - fprintf(stderr, "char length : %d\n", char_len); - if (char_len <= len) { - key->type = KEY_CHAR; - memcpy(key->c.c, seq, len); - key->c.len = len; - return; - } - } -} - -KeyInfo *editorReadKey() { - fd_set fds; - int timeout_ms = 10; - struct timeval tv; - int total = 0; - KeyInfo *key = (KeyInfo *)malloc(sizeof(KeyInfo)); - int len; - unsigned char buffer[20]; - - if (read(STDIN_FILENO, &buffer[0], 1) <= 0) - return 0; - - while (total < 20) { - FD_ZERO(&fds); - FD_SET(STDIN_FILENO, &fds); - tv.tv_sec = 0; - tv.tv_usec = timeout_ms * 1000; - - int ret = select(STDIN_FILENO + 1, &fds, NULL, NULL, &tv); - if (ret <= 0) - break; - - if (read(STDIN_FILENO, &buffer[total], 1) <= 0) - break; - total++; - } - total++; - - parse_key(buffer, total, key); - - // DEBUG - - fprintf(stderr, "%s %d %d %s %d\n", buffer, buffer[0], buffer[1], key->c.c, key->c.len); - - return key; + return (unsigned char)c; } int getCursorPosition(int *rows, int *cols) { diff --git a/src/utf8.c b/src/utf8.c new file mode 100644 index 0000000..10db8b3 --- /dev/null +++ b/src/utf8.c @@ -0,0 +1,148 @@ +/** + * @file utf8.c + */ + +#include "../include/utf8.h" +#include "../include/data.h" + +#include +#include + + +uint32_t readUtf8Char(void) +{ + unsigned char buf[4]; + + read(STDIN_FILENO, &buf[0], 1); + + int extra; + uint32_t cp; + + if (buf[0] < 0x80) + { + cp = buf[0]; + extra = 0; + } + else if (buf[0] < 0xC0) { return 0xFFFD; } // stray continuation + else if (buf[0] < 0xE0) + { + cp = buf[0] & 0x1F; + extra = 1; + } + else if (buf[0] < 0xF0) + { + cp = buf[0] & 0x0F; + extra = 2; + } + else + { + cp = buf[0] & 0x07; + extra = 3; + } + + if (extra > 0) + { + read(STDIN_FILENO, &buf[1], extra); // read remaining bytes at once + for (int i = 0; i < extra; i++) + cp = (cp << 6) | (buf[1 + i] & 0x3F); + } + + return cp; +} + +uint32_t utf8Decode(const char** s) +{ + unsigned char c = (unsigned char)**s; + uint32_t cp; + int extra; + if (c < 0x80) + { + cp = c; + extra = 0; + } + else if (c < 0xC0) + { + (*s)++; + return 0xFFFD; + } + else if (c < 0xE0) + { + cp = c & 0x1F; + extra = 1; + } + else if (c < 0xF0) + { + cp = c & 0x0F; + extra = 2; + } + else + { + cp = c & 0x07; + extra = 3; + } + (*s)++; + while (extra--) + { + c = (unsigned char)**s; + if ((c & 0xC0) != 0x80) return 0xFFFD; + cp = (cp << 6) | (c & 0x3F); + (*s)++; + } + return cp; +} + +// buf must have at least 4 bytes; returns bytes written +int utf8Encode(uint32_t cp, char* buf) +{ + if (cp < 0x80) + { + buf[0] = cp; + return 1; + } + if (cp < 0x800) + { + buf[0] = 0xC0 | (cp >> 6); + buf[1] = 0x80 | (cp & 0x3F); + return 2; + } + if (cp < 0x10000) + { + buf[0] = 0xE0 | (cp >> 12); + buf[1] = 0x80 | ((cp >> 6) & 0x3F); + buf[2] = 0x80 | (cp & 0x3F); + return 3; + } + buf[0] = 0xF0 | (cp >> 18); + buf[1] = 0x80 | ((cp >> 12) & 0x3F); + buf[2] = 0x80 | ((cp >> 6) & 0x3F); + buf[3] = 0x80 | (cp & 0x3F); + return 4; +} + +int utf8Seqlen(unsigned char c) +{ + if (c < 0x80) return 1; + if (c < 0xC0) return 0; /* continuation — shouldn't be leading */ + if (c < 0xE0) return 2; + if (c < 0xF0) return 3; + return 4; +} + +/** + * @param codepoint utf8 codepoint of a char + * @return length of the codepoint + */ +int codepointWidth(uint32_t codepoint) +{ + if (codepoint < 0x20 || codepoint == 0x7F) return 0; + /* rough double-width ranges */ + if ((codepoint >= 0x1100 && codepoint <= 0x115F) || + (codepoint >= 0x2E80 && codepoint <= 0x303E) || + (codepoint >= 0x3041 && codepoint <= 0x33BF) || + (codepoint >= 0xAC00 && codepoint <= 0xD7AF) || + (codepoint >= 0xF900 && codepoint <= 0xFAFF) || + (codepoint >= 0xFF01 && codepoint <= 0xFF60) || + (codepoint >= 0x1F300 && codepoint <= 0x1FAFF)) + return 2; + return 1; +}