utf8 processing without struct

This commit is contained in:
2026-05-03 23:32:40 +02:00
parent eae85c32ca
commit 8e1b4d2f86
23 changed files with 637 additions and 906 deletions
+85 -193
View File
@@ -1,10 +1,16 @@
#include "../include/terminal.h"
#include <ctype.h>
#include "../include/data.h"
#include "../include/define.h"
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include "include/utf8.h"
void die(const char *s) {
write(STDOUT_FILENO, "\x1b[2J", 4);
write(STDOUT_FILENO, CURSOR_TOP_LEFT, 3);
@@ -37,211 +43,97 @@ void enableRawMode() {
}
}
int utf8_char_length(unsigned char first_byte) {
if ((first_byte & 0x80) == 0)
return 1; // 0xxxxxxx - ASCII
if ((first_byte & 0xE0) == 0xC0)
return 2; // 110xxxxx - 2 bytes
if ((first_byte & 0xF0) == 0xE0)
return 3; // 1110xxxx - 3 bytes
if ((first_byte & 0xF8) == 0xF0)
return 4; // 11110xxx - 4 bytes
return 1; // Invalid, treat as single byte
}
#include <ctype.h> /* isprint */
// Convert UTF-8 to Unicode code point
unsigned int utf8_to_codepoint(const unsigned char *bytes, int len) {
if (len == 1)
return bytes[0];
if (len == 2)
return ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F);
if (len == 3)
return ((bytes[0] & 0x0F) << 12) | ((bytes[1] & 0x3F) << 6) |
(bytes[2] & 0x3F);
if (len == 4)
return ((bytes[0] & 0x07) << 18) | ((bytes[1] & 0x3F) << 12) |
((bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F);
return 0;
}
char *key_to_string(int key) {
static char key_str[32];
void parse_key(unsigned char *seq, int len, KeyInfo *key) {
memcpy(key->c.c, seq, len);
key->c.len = len;
key->modifiers = MOD_NONE;
key->type = KEY_UNKNOWN;
// Control characters (Ctrl+A to Ctrl+Z)
if (len == 1 && seq[0] < 32 && seq[0] != 27 && seq[0] != 9 && seq[0] != 10 &&
seq[0] != 13) {
key->type = KEY_CTRL;
key->data.ctrl_char = seq[0] + 64;
return;
}
// Special single characters
if (len == 1) {
switch (seq[0]) {
case 9:
case 10:
case 13:
case 27:
case 127:
key->type = KEY_SPECIAL;
key->data.special = seq[0];
return;
if (key == '\r') {
strcpy(key_str, "ENTER");
} else if (key >= 1 && key <= 26) {
snprintf(key_str, sizeof(key_str), "CTRL-%c", 'a' + key - 1);
} else {
switch (key) {
case ARROW_UP: strcpy(key_str, "ARROW-UP"); break;
case ARROW_DOWN: strcpy(key_str, "ARROW-DOWN"); break;
case ARROW_LEFT: strcpy(key_str, "ARROW-LEFT"); break;
case ARROW_RIGHT: strcpy(key_str, "ARROW-RIGHT"); break;
case PAGE_UP: strcpy(key_str, "PAGE-UP"); break;
case PAGE_DOWN: strcpy(key_str, "PAGE-DOWN"); break;
case DEL_KEY: strcpy(key_str, "DEL"); break;
case BACKSPACE: strcpy(key_str, "BACKSPACE"); break;
case BEG_LINE: strcpy(key_str, "HOME"); break;
case END_LINE: strcpy(key_str, "END"); break;
case '\x1b': strcpy(key_str, "ESCAPE"); break;
default:
if (key > 127) {
/* UTF-8 code point — re-encode into the buffer */
char buf[5] = {0};
int n = utf8Encode((uint32_t)key, buf);
snprintf(key_str, sizeof(key_str), "%.*s", n, buf);
} else if (isprint(key)) {
snprintf(key_str, sizeof(key_str), "%c", key);
} else {
snprintf(key_str, sizeof(key_str), "KEY-%d", key);
}
}
}
return key_str;
}
// Escape sequences
if (len >= 2 && seq[0] == 27) {
// Alt+key combinations
if (len == 2 && seq[1] >= 32 && seq[1] < 127) {
key->type = KEY_ALT;
key->data.alt_char = seq[1];
return;
}
int editorReadKey() {
char c;
/* read first byte — may be start of UTF-8 or escape */
while (read(STDIN_FILENO, &c, 1) != 1);
// CSI sequences (ESC [ ...)
if (len >= 3 && seq[1] == '[') {
// Arrow keys
if (len == 3) {
switch (seq[2]) {
case 'A':
case 'B':
case 'C':
case 'D':
key->type = KEY_ARROW;
key->data.arrow = seq[2];
return;
case 'H':
case 'F':
key->type = KEY_NAVIGATION;
key->data.special = seq[2];
return;
}
}
// Modified keys (ESC [ 1 ; modifier letter)
if (len >= 6 && seq[2] == '1' && seq[3] == ';') {
int modifier = seq[4] - '0';
char k = seq[5];
if (modifier & 1)
key->modifiers |= MOD_SHIFT;
if (modifier & 2)
key->modifiers |= MOD_ALT;
if (modifier & 4)
key->modifiers |= MOD_CTRL;
switch (k) {
case 'A':
case 'B':
case 'C':
case 'D':
key->type = KEY_ARROW;
key->data.arrow = k;
return;
case 'H':
case 'F':
key->type = KEY_NAVIGATION;
key->data.special = k;
return;
}
}
// Function keys and navigation
if (len == 4 && seq[3] == '~') {
int num = seq[2] - '0';
if (num >= 1 && num <= 6) {
key->type = KEY_NAVIGATION;
key->data.special = seq[2];
return;
}
}
if (len == 5 && seq[4] == '~') {
int num = (seq[2] - '0') * 10 + (seq[3] - '0');
if (num >= 15 && num <= 24) {
key->type = KEY_FUNCTION;
// Map to F5-F12
int f_map[] = {15, 17, 18, 19, 20, 21, 23, 24};
for (int i = 0; i < 8; i++) {
if (f_map[i] == num) {
key->data.function_num = i + 5;
return;
if (c == '\x1b') {
char seq[6];
/* try to read escape sequence */
if (read(STDIN_FILENO, &seq[0], 1) != 1) return '\x1b';
if (read(STDIN_FILENO, &seq[1], 1) != 1) return '\x1b';
if (seq[0] == '[') {
if (seq[1] >= '0' && seq[1] <= '9') {
if (read(STDIN_FILENO, &seq[2], 1) != 1) return '\x1b';
if (seq[2] == '~') {
switch (seq[1]) {
case '1': return BEG_LINE;
case '3': return DEL_KEY;
case '4': return END_LINE;
case '5': return PAGE_UP;
case '6': return PAGE_DOWN;
case '7': return BEG_LINE;
case '8': return END_LINE;
}
}
} else {
switch (seq[1]) {
case 'A': return ARROW_UP;
case 'B': return ARROW_DOWN;
case 'C': return ARROW_RIGHT;
case 'D': return ARROW_LEFT;
case 'H': return BEG_LINE;
case 'F': return END_LINE;
}
}
}
}
}
return '\x1b';
}
// SS3 sequences (ESC O ...)
if (len == 3 && seq[1] == 'O') {
switch (seq[2]) {
case 'P':
case 'Q':
case 'R':
case 'S':
key->type = KEY_FUNCTION;
key->data.function_num = seq[2] - 'P' + 1;
return;
case 'H':
case 'F':
key->type = KEY_NAVIGATION;
key->data.special = seq[2];
return;
}
/* multi-byte UTF-8: read remaining bytes */
int seqlen = utf8Seqlen((unsigned char)c);
if (seqlen > 1) {
/* pack into a pseudo-codepoint just to pass bytes through;
we handle encoding/decoding at the row level */
char buf[4] = {c, 0, 0, 0};
for (int i = 1; i < seqlen; i++)
if (read(STDIN_FILENO, &buf[i], 1) != 1) break;
/* decode and return as uint32, but we need int — use high range */
const char *p = buf;
uint32_t cp = utf8Decode(&p);
return (int)cp; /* caller re-encodes when inserting */
}
}
// UTF-8 character
if (seq[0] >= 32 || (seq[0] & 0x80)) {
int char_len = utf8_char_length(seq[0]);
fprintf(stderr, "char length : %d\n", char_len);
if (char_len <= len) {
key->type = KEY_CHAR;
memcpy(key->c.c, seq, len);
key->c.len = len;
return;
}
}
}
KeyInfo *editorReadKey() {
fd_set fds;
int timeout_ms = 10;
struct timeval tv;
int total = 0;
KeyInfo *key = (KeyInfo *)malloc(sizeof(KeyInfo));
int len;
unsigned char buffer[20];
if (read(STDIN_FILENO, &buffer[0], 1) <= 0)
return 0;
while (total < 20) {
FD_ZERO(&fds);
FD_SET(STDIN_FILENO, &fds);
tv.tv_sec = 0;
tv.tv_usec = timeout_ms * 1000;
int ret = select(STDIN_FILENO + 1, &fds, NULL, NULL, &tv);
if (ret <= 0)
break;
if (read(STDIN_FILENO, &buffer[total], 1) <= 0)
break;
total++;
}
total++;
parse_key(buffer, total, key);
// DEBUG
fprintf(stderr, "%s %d %d %s %d\n", buffer, buffer[0], buffer[1], key->c.c, key->c.len);
return key;
return (unsigned char)c;
}
int getCursorPosition(int *rows, int *cols) {