add utf8_char_t struct

This commit is contained in:
Arthur Barraux
2025-11-19 10:37:41 +01:00
parent c06c820dfb
commit eae85c32ca
18 changed files with 770 additions and 339 deletions
+197 -57
View File
@@ -2,6 +2,8 @@
#include "../include/data.h"
#include <stdio.h>
#include <unistd.h>
#include <string.h>
void die(const char *s) {
write(STDOUT_FILENO, "\x1b[2J", 4);
@@ -35,73 +37,211 @@ void enableRawMode() {
}
}
int editorReadKey() {
int nread;
char c;
char seq[3];
while ((nread = read(STDIN_FILENO, &c, 1)) != 1) {
if (nread == -1 && errno != EAGAIN) {
die("read");
int utf8_char_length(unsigned char first_byte) {
if ((first_byte & 0x80) == 0)
return 1; // 0xxxxxxx - ASCII
if ((first_byte & 0xE0) == 0xC0)
return 2; // 110xxxxx - 2 bytes
if ((first_byte & 0xF0) == 0xE0)
return 3; // 1110xxxx - 3 bytes
if ((first_byte & 0xF8) == 0xF0)
return 4; // 11110xxx - 4 bytes
return 1; // Invalid, treat as single byte
}
// Convert UTF-8 to Unicode code point
unsigned int utf8_to_codepoint(const unsigned char *bytes, int len) {
if (len == 1)
return bytes[0];
if (len == 2)
return ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F);
if (len == 3)
return ((bytes[0] & 0x0F) << 12) | ((bytes[1] & 0x3F) << 6) |
(bytes[2] & 0x3F);
if (len == 4)
return ((bytes[0] & 0x07) << 18) | ((bytes[1] & 0x3F) << 12) |
((bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F);
return 0;
}
void parse_key(unsigned char *seq, int len, KeyInfo *key) {
memcpy(key->c.c, seq, len);
key->c.len = len;
key->modifiers = MOD_NONE;
key->type = KEY_UNKNOWN;
// Control characters (Ctrl+A to Ctrl+Z)
if (len == 1 && seq[0] < 32 && seq[0] != 27 && seq[0] != 9 && seq[0] != 10 &&
seq[0] != 13) {
key->type = KEY_CTRL;
key->data.ctrl_char = seq[0] + 64;
return;
}
// Special single characters
if (len == 1) {
switch (seq[0]) {
case 9:
case 10:
case 13:
case 27:
case 127:
key->type = KEY_SPECIAL;
key->data.special = seq[0];
return;
}
}
if (c == '\x1b') {
if (read(STDIN_FILENO, &seq[0], 1) != 1 ||
read(STDIN_FILENO, &seq[1], 1) != 1) {
return '\x1b';
// Escape sequences
if (len >= 2 && seq[0] == 27) {
// Alt+key combinations
if (len == 2 && seq[1] >= 32 && seq[1] < 127) {
key->type = KEY_ALT;
key->data.alt_char = seq[1];
return;
}
if (seq[0] == '[') {
if (seq[1] >= '0' && seq[1] <= '9') {
if (read(STDIN_FILENO, &seq[2], 1) != 1) {
return '\x1b';
// CSI sequences (ESC [ ...)
if (len >= 3 && seq[1] == '[') {
// Arrow keys
if (len == 3) {
switch (seq[2]) {
case 'A':
case 'B':
case 'C':
case 'D':
key->type = KEY_ARROW;
key->data.arrow = seq[2];
return;
case 'H':
case 'F':
key->type = KEY_NAVIGATION;
key->data.special = seq[2];
return;
}
if (seq[2] == '~') {
switch (seq[1]) {
case '1':
return BEG_LINE;
case '3':
return DEL_KEY;
case '4':
return END_LINE;
case '5':
return PAGE_UP;
case '6':
return PAGE_DOWN;
case '7':
return BEG_LINE;
case '8':
return END_LINE;
}
// Modified keys (ESC [ 1 ; modifier letter)
if (len >= 6 && seq[2] == '1' && seq[3] == ';') {
int modifier = seq[4] - '0';
char k = seq[5];
if (modifier & 1)
key->modifiers |= MOD_SHIFT;
if (modifier & 2)
key->modifiers |= MOD_ALT;
if (modifier & 4)
key->modifiers |= MOD_CTRL;
switch (k) {
case 'A':
case 'B':
case 'C':
case 'D':
key->type = KEY_ARROW;
key->data.arrow = k;
return;
case 'H':
case 'F':
key->type = KEY_NAVIGATION;
key->data.special = k;
return;
}
}
// Function keys and navigation
if (len == 4 && seq[3] == '~') {
int num = seq[2] - '0';
if (num >= 1 && num <= 6) {
key->type = KEY_NAVIGATION;
key->data.special = seq[2];
return;
}
}
if (len == 5 && seq[4] == '~') {
int num = (seq[2] - '0') * 10 + (seq[3] - '0');
if (num >= 15 && num <= 24) {
key->type = KEY_FUNCTION;
// Map to F5-F12
int f_map[] = {15, 17, 18, 19, 20, 21, 23, 24};
for (int i = 0; i < 8; i++) {
if (f_map[i] == num) {
key->data.function_num = i + 5;
return;
}
}
}
} else {
switch (seq[1]) {
case 'A':
return ARROW_UP;
case 'B':
return ARROW_DOWN;
case 'C':
return ARROW_RIGHT;
case 'D':
return ARROW_LEFT;
case 'H':
return BEG_LINE;
case 'F':
return END_LINE;
}
}
} else if (seq[0] == 'O') {
switch (seq[1]) {
case 'H':
return BEG_LINE;
case 'F':
return END_LINE;
}
}
return '\x1b';
} else {
return c;
// SS3 sequences (ESC O ...)
if (len == 3 && seq[1] == 'O') {
switch (seq[2]) {
case 'P':
case 'Q':
case 'R':
case 'S':
key->type = KEY_FUNCTION;
key->data.function_num = seq[2] - 'P' + 1;
return;
case 'H':
case 'F':
key->type = KEY_NAVIGATION;
key->data.special = seq[2];
return;
}
}
}
// UTF-8 character
if (seq[0] >= 32 || (seq[0] & 0x80)) {
int char_len = utf8_char_length(seq[0]);
fprintf(stderr, "char length : %d\n", char_len);
if (char_len <= len) {
key->type = KEY_CHAR;
memcpy(key->c.c, seq, len);
key->c.len = len;
return;
}
}
}
KeyInfo *editorReadKey() {
fd_set fds;
int timeout_ms = 10;
struct timeval tv;
int total = 0;
KeyInfo *key = (KeyInfo *)malloc(sizeof(KeyInfo));
int len;
unsigned char buffer[20];
if (read(STDIN_FILENO, &buffer[0], 1) <= 0)
return 0;
while (total < 20) {
FD_ZERO(&fds);
FD_SET(STDIN_FILENO, &fds);
tv.tv_sec = 0;
tv.tv_usec = timeout_ms * 1000;
int ret = select(STDIN_FILENO + 1, &fds, NULL, NULL, &tv);
if (ret <= 0)
break;
if (read(STDIN_FILENO, &buffer[total], 1) <= 0)
break;
total++;
}
total++;
parse_key(buffer, total, key);
// DEBUG
fprintf(stderr, "%s %d %d %s %d\n", buffer, buffer[0], buffer[1], key->c.c, key->c.len);
return key;
}
int getCursorPosition(int *rows, int *cols) {