utf8 processing without struct

This commit is contained in:
2026-05-03 23:32:40 +02:00
parent eae85c32ca
commit 8e1b4d2f86
23 changed files with 637 additions and 906 deletions
+43 -184
View File
@@ -6,213 +6,65 @@
#include <string.h>
#include <time.h>
extern struct editorConfig E;
#include "include/utf8.h"
static int is_tab(utf_8_char_t *ch) {
return ch->len == 1 && ch->c[0] == '\t';
}
// Helper function to check if two utf_8_char_t are equal
static int utf8_char_equal(utf_8_char_t *a, utf_8_char_t *b) {
if (a->len != b->len) return 0;
return memcmp(a->c, b->c, a->len) == 0;
}
// Helper function to create a space character
static utf_8_char_t make_space() {
utf_8_char_t space;
space.c[0] = ' ';
space.len = 1;
return space;
}
int editorRowCxToRx(erow *row, int cursor_x) {
int render_x = 0;
int i;
for (i = 0; i < cursor_x; ++i) {
if (is_tab(&row->chars[i])) {
render_x += (E.constantes.TAB_LENGTH - 1) - (render_x % E.constantes.TAB_LENGTH);
}
render_x++;
}
return render_x;
}
int editorRowRxToCx(erow *row, int rx) {
int cur_rx = 0;
int cx;
for (cx = 0; cx < row->size; cx++) {
if (is_tab(&row->chars[cx]))
cur_rx += (E.constantes.TAB_LENGTH - 1) - (cur_rx % E.constantes.TAB_LENGTH);
cur_rx++;
if (cur_rx > rx) return cx;
}
return cx;
}
/**
* \fn editorUpdateRow(erow *row)
* \brief Copy content of \p row in \p row->render.
* */
void editorUpdateRow(erow *row) {
int i, i_render;
int tabs = 0;
// Count number of tabs
for (i = 0; i < row->size; ++i) {
if (is_tab(&row->chars[i])) {
tabs++;
}
}
free(row->render);
// Allocate space for utf_8_char_t array
row->render = malloc(sizeof(utf_8_char_t) * (row->size + tabs * (E.constantes.TAB_LENGTH - 1)));
if (!row->render) {
row->rsize = 0;
return;
}
i_render = 0;
for (i = 0; i < row->size; ++i) {
if (is_tab(&row->chars[i])) {
// Replace tab with spaces
row->render[i_render++] = make_space();
while (i_render % E.constantes.TAB_LENGTH) {
row->render[i_render++] = make_space();
}
} else {
row->render[i_render++] = row->chars[i];
}
}
row->rsize = i_render;
}
void editorInsertRow(int at, char *s, size_t len) {
void editorInsertRow(int at, char *s, int len) {
if (at < 0 || at > E.numrows) {
return;
}
erow *tmp = (erow *)realloc(E.row, sizeof(erow) * (E.numrows + 1));
row_t *tmp = (row_t *)realloc(E.rows, sizeof(row_t) * (E.numrows + 1));
if (!tmp) {
return;
}
E.row = tmp;
memmove(&E.row[at + 1], &E.row[at], sizeof(erow) * (E.numrows - at));
E.rows = tmp;
memmove(&E.rows[at + 1], &E.rows[at], sizeof(row_t) * (E.numrows - at));
// Initialize the new row
E.row[at].size = 0;
E.row[at].chars = NULL;
E.row[at].rsize = 0;
E.row[at].render = NULL;
E.rows[at].size = len;
E.rows[at].chars = NULL;
// Count UTF-8 characters first
int char_count = 0;
int i = 0;
while (i < len) {
unsigned char first = (unsigned char)s[i];
int char_len;
if ((first & 0x80) == 0) {
char_len = 1;
} else if ((first & 0xE0) == 0xC0) {
char_len = 2;
} else if ((first & 0xF0) == 0xE0) {
char_len = 3;
} else if ((first & 0xF8) == 0xF0) {
char_len = 4;
} else {
char_len = 1; // Invalid, treat as single byte
}
i += char_len;
char_count++;
}
E.rows[at].cap = len + 1;
E.rows[at].chars = malloc(E.rows[at].cap);
// Allocate for the actual number of characters
if (char_count > 0) {
E.row[at].chars = malloc(sizeof(utf_8_char_t) * char_count);
if (!E.row[at].chars) {
return;
}
}
// Now convert to utf_8_char_t array
i = 0;
E.row[at].size = 0;
while (i < len && E.row[at].size < char_count) {
utf_8_char_t ch;
unsigned char first = (unsigned char)s[i];
if ((first & 0x80) == 0) {
ch.len = 1;
} else if ((first & 0xE0) == 0xC0) {
ch.len = 2;
} else if ((first & 0xF0) == 0xE0) {
ch.len = 3;
} else if ((first & 0xF8) == 0xF0) {
ch.len = 4;
} else {
ch.len = 1;
}
// Copy bytes
for (int j = 0; j < ch.len && i < len; j++) {
ch.c[j] = s[i++];
}
E.row[at].chars[E.row[at].size++] = ch;
}
memcpy(E.rows[at].chars, s, len);
E.rows[at].chars[len] = '\n';
editorUpdateRow(&E.row[at]);
++E.numrows;
++E.dirty;
}
void editorFreeRow(erow *row) {
free(row->render);
void editorFreeRow(row_t *row) {
free(row->chars);
}
void editorDelRow(int at) {
if (at < 0 || at >= E.numrows) {
return;
int editorRowCxToByte(const row_t *row, int cursor_x) {
int i = 0, col = 0;
while (col < cursor_x && i < row->size) {
int sl = utf8Seqlen((unsigned char)row->chars[i]);
if (sl < 1) sl = 1;
col++;
i += sl;
}
editorFreeRow(&E.row[at]);
memmove(&E.row[at], &E.row[at + 1], sizeof(erow) * (E.numrows - at - 1));
--E.numrows;
++E.dirty;
return i;
}
/**
* \fn editorRowInsertChar(erow *row, int at, int c)
* \param at Index of where we want to insert the char */
void editorRowInsertChar(erow *row, int at, utf_8_char_t c) {
void editorRowInsertBytes(row_t *row, int at, const char *src, int n) {
if (E.state == READ_ONLY)
return;
if (at < 0 || at > row->size) {
at = row->size;
if (row->size + n + 1 > row->cap) {
row->cap = (row->size + n + 1) * 2;
row->chars = realloc(row->chars, row->cap);
}
row->chars = realloc(row->chars, row->size + 1);
memmove(&row->chars[at + 1], &row->chars[at], row->size - at + 1);
++(row->size);
row->chars[at] = c;
fprintf(stderr, "Row insert : %s %d\n", c.c, c.len);
editorUpdateRow(row);
++E.dirty;
}
void editorRowAppendString(erow *row, char *s, size_t len) {
row->chars = realloc(row->chars, row->size + len + 1);
memcpy(&row->chars[row->size], s, len);
row->size += len;
editorUpdateRow(row);
memmove(row->chars + at + n, row->chars + at, row->size - at);
memcpy(row->chars + at, src, n);
row->size += n;
row->chars[row->size] = '\0';
++E.dirty;
}
@@ -221,12 +73,19 @@ void editorRowAppendString(erow *row, char *s, size_t len) {
* \brief Delete the a char at the chosen position on the given row
* \param at Index of the char to delete
* \param row Row on operation is made */
void editorRowDelchar(erow *row, int at) {
if (at < 0 || at >= row->size) {
return;
}
memmove(&row->chars[at], &row->chars[at + 1], row->size - at);
--row->size;
editorUpdateRow(row);
++E.dirty;
void editorRowDelByte(row_t *row, int at, int n) {
memmove(row->chars + at, row->chars + at + n, row->size - at - n);
row->size -= n;
row->chars[row->size] = '\0';
}
int editorRowCharCount(row_t *row)
{
int n = 0, i = 0;
while (i < row->size) {
int sl = utf8Seqlen((unsigned char)row->chars[i]);
if (sl < 1) sl = 1;
n++; i += sl;
}
return n;
}