Merge branch 'refs/heads/char_encode'

# Conflicts:
#	include/builtins.h
#	include/data.h
#	include/editor_op.h
#	include/file_io.h
#	include/input.h
#	include/row_op.h
#	install.sh
#	main.c
#	meson.build
#	src/builtins.c
#	src/editor_op.c
#	src/file_io.c
#	src/init.c
#	src/input.c
#	src/output.c
#	src/row_op.c
This commit is contained in:
2026-05-04 00:31:48 +02:00
16 changed files with 350 additions and 330 deletions
+13 -8
View File
@@ -8,18 +8,16 @@
#include "lisp.h"
/**
* \struct erow
* \struct row_t
* \brief Store one editor row
* \param
* */
typedef struct frow {
typedef struct row {
int size; /**< Size of the line */
int rsize; /**< Size of the render line */
int cap; /**< Size of the render line */
char *chars; /**< Characters of the line */
char *render; /**< The actual line we will print */
} frow;
} row_t;
/**
* @brief Split modes for screen layout
@@ -41,7 +39,7 @@ typedef struct {
int width; // Width of this pane
int cursor_x; // Local cursor x in this pane
int cursor_y; // Local cursor y in this pane
int rx, ry;
int rx, ry;
int row_offset; // Scroll offset for rows
int col_offset; // Scroll offset for columns
int is_active; // Is this pane currently active
@@ -113,11 +111,17 @@ struct buffer_t {
* \brief Containing our editor state.
*/
struct editorConfig {
int cursor_x, cursor_y; /**< Cursor position */
int rx; /**< Position in the render*/
int row_offset; /**< Position scroll of lines */
int col_offset; /**< Position scroll of colomns*/
int screenrows; /**< Terminal height*/
int screencols; /**< Terminal width*/
ScreenLayout layout;
int numrows; /**< Number of rows contained */
row_t *rows; /**< Store all the rows printed */
int dirty;
int prefix_state;
char status_msg[80];
@@ -158,4 +162,5 @@ struct abuf {
extern struct editorConfig E;
#endif
+14 -12
View File
@@ -8,19 +8,21 @@
#define HIDE_CURSOR "\x1b[?25l"
#define SHOW_CURSOR "\x1b[?25h"
#define ERASE_END_LINE "\x1b[K"
#define TAB "\x09"
#define SPACE "\x20"
enum editorKey {
BACKSPACE = 127,
ARROW_LEFT = 1000,
ARROW_RIGHT,
ARROW_UP,
ARROW_DOWN,
DEL_KEY,
BEG_LINE,
END_LINE,
PAGE_UP,
PAGE_DOWN,
};
enum editorKey_e {
BACKSPACE = 127,
ARROW_LEFT = 1000,
ARROW_RIGHT,
ARROW_UP,
ARROW_DOWN,
DEL_KEY,
BEG_LINE,
END_LINE,
PAGE_UP,
PAGE_DOWN,
};
#define ABUF_INIT {NULL, 0}
-2
View File
@@ -6,8 +6,6 @@ void bufferInsertChar(int c);
void bufferInsertNewLine();
void bufferDelChar();
void editorSetStatusMessage(const char *fmt, ...);
#endif // EDITOR_OP_H_
+4 -7
View File
@@ -8,18 +8,15 @@
#include <time.h>
#include <unistd.h>
int bufferRowCxToRx(frow *row, int cursor_x);
int bufferRowRxToCx(frow *row, int rx);
void bufferUpdatfrow(frow *row);
void bufferInsertRow(struct buffer_t *buffer, int at, char *s, size_t len);
int editorRowCxToByte(const row_t *row, int cursor_x);
void bufferFrefrow(frow *row);
int editorRowCharCount(row_t *row);
void bufferDelRow(struct buffer_t *buffer, int at);
void editorRowInsertBytes(row_t *row, int at, const char *src, int len);
void editorRowDelByte(row_t *row, int at, int n);
void bufferRowInsertChar(struct buffer_t *buffer, frow *row, int at, int c);
void bufferRowAppendString(struct buffer_t *buffer, frow *row, char *s, size_t len);
+2
View File
@@ -31,4 +31,6 @@ int getCursorPosition(int *rows, int *cols);
int getWindowSize(int *rows, int *cols);
char *key_to_string(int key);
#endif
+16
View File
@@ -0,0 +1,16 @@
//
// Created by Giorgio on 01/05/2026.
//
#ifndef BELUGA_UTF8_H
#define BELUGA_UTF8_H
#include <stdint.h>
uint32_t readUtf8Char(void);
int utf8Encode(uint32_t cp, char *buf);
int utf8Seqlen(unsigned char c);
int codepointWidth(uint32_t codepoint);
uint32_t utf8Decode(const char** s);
#endif //BELUGA_UTF8_H
Executable → Regular
View File
+2 -1
View File
@@ -22,7 +22,8 @@ src_files = files(
'src/terminal.c',
'src/builtins.c',
'src/buffer.c',
'src/split_screen.c'
'src/split_screen.c',
'src/utf8.c'
)
# Executable
-2
View File
@@ -1,7 +1,5 @@
#include "../include/append_buffer.h"
extern struct editorConfig E;
void abAppend(struct abuf *ab, const char *s, int len) {
char *new = realloc(ab->b, ab->len + len);
-20
View File
@@ -1,5 +1,3 @@
#include <stdarg.h>
#include "../include/editor_op.h"
#include "../include/row_op.h"
#include "include/buffer.h"
@@ -62,21 +60,3 @@ void bufferInsertNewLine() {
fprintf(stderr, "Insert new line done\n");
}
void bufferDelChar() {
frow *row;
EditorPane *active = splitScreenGetActivePane();
struct buffer_t *buf = bufferFindById(active->buffer_id);
if (active->cursor_y == buf->numrows || !(active->cursor_x || active->cursor_y)) {
return;
}
row = &buf->row[active->cursor_y];
if (active->cursor_x > 0) {
bufferRowDelchar(buf, row, active->cursor_x - 1);
--active->cursor_x;
} else {
active->cursor_x = buf->row[active->cursor_y - 1].size;
bufferRowAppendString(buf, &buf->row[active->cursor_y - 1], row->chars, row->size);
bufferDelRow(buf, active->cursor_y);
--active->cursor_y;
}
}
+5 -38
View File
@@ -9,53 +9,19 @@
#include "../include/file_io.h"
#include "../include/editor_op.h"
#include "../include/input.h"
#include "include/buffer.h"
#include "include/data.h"
#include "include/split_screen.h"
#include "../include/buffer.h"
#include "../include/data.h"
#include "../include/split_screen.h"
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <errno.h>
extern char *strdup(const char *);
extern ssize_t getline(char **restrict lineptr, size_t *restrict n,
FILE *restrict stream);
extern int ftruncate(int fd, off_t length);
extern struct editorConfig E;
/**
* @brief Converts all editor rows to a single string buffer
* @details Concatenates all row content into a single allocated buffer with
* newlines between rows. Useful for file saving and buffer operations.
* @param buffer_len Pointer to integer where total buffer length will be stored
* @return Pointer to dynamically allocated buffer containing all row data.
* Rows are separated by newline characters.
* @note Caller is responsible for freeing the returned buffer
*/
char *bufferRowsToString(struct buffer_t *buf, int *buffer_len) {
int tot_len = 0;
int j;
char *buffer;
char *p;
for (j = 0; j < buf->numrows; ++j) {
tot_len += buf->row[j].size + 1;
}
*buffer_len = tot_len;
buffer = malloc(tot_len);
p = buffer;
for (j = 0; j < buf->numrows; ++j) {
memcpy(p, buf->row[j].chars, buf->row[j].size);
p += buf->row[j].size;
*p = '\n';
p++;
}
return buffer;
}
/**
* @brief Closes the current file and resets editor state
* @details Clears all rows, resets cursor position, scroll offsets, and file
@@ -191,3 +157,4 @@ void bufferFind(struct buffer_t *buf) {
}
free(query);
}
+3 -2
View File
@@ -3,7 +3,7 @@
#include "../include/color.h"
#include "../include/data.h"
#include "../include/terminal.h"
#include "include/split_screen.h"
#include "../include/split_screen.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -17,6 +17,7 @@ struct editorConfig;
void registerBuiltin(char *key_sequence, LispCFunc f) {
lisp_env_define(E.ctx.p->env, lisp_make_symbol(key_sequence, E.ctx),
lisp_make_func(f), E.ctx);
}
void initBuiltins() {
@@ -91,7 +92,7 @@ void initEditor() {
}
E.screenrows -= 2;
// Init graphics variables
splitScreenInit();
EditorPane *active = splitScreenGetActivePane();
+6 -80
View File
@@ -2,16 +2,19 @@
#include "../include/define.h"
#include "../include/editor_op.h"
#include "../include/output.h"
#include "include/data.h"
#include "include/buffer.h"
#include "include/data.h"
#include "include/split_screen.h"
#include <ctype.h>
#include <sys/stat.h>
#include <dirent.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
extern struct editorConfig E;
@@ -78,7 +81,7 @@ const char *file_completion(const char *path) {
strcat(full_path, "/"); // add slash for directories
}
closedir(dir);
return strdup(full_path);
}
}
@@ -162,83 +165,6 @@ char *editorPrompt(char *prompt, char *placeHolder, char bPathMode) {
}
}
/**
* @brief Converts a key code to its string representation
* @details Translates raw key codes (including special keys, control keys,
* and regular characters) into human-readable string formats suitable for
* display and keybinding configuration.
* @param key The key code to convert
* @return Pointer to static buffer containing the string representation.
* Examples: "ENTER", "ARROW-UP", "CTRL-a", "TAB", "DELETE", etc.
* @note Returns pointer to static buffer; string is overwritten on next call
* @note Non-printable characters are formatted as "KEY-<number>"
*/
char *key_to_string(int key) {
static char key_str[32];
char tmp[10];
sprintf(tmp, "%d", key);
// First test enter key
if (key == '\r') {
strcpy(key_str, "ENTER");
} else if (key == '\t') {
strcpy(key_str, "TAB");
} else if (key >= 1 && key <= 26) { // CTRL keys
snprintf(key_str, sizeof(key_str), "CTRL-%c", 'a' + key - 1);
} else {
switch (key) {
case ARROW_UP:
strcpy(key_str, "ARROW-UP");
break;
case ARROW_DOWN:
strcpy(key_str, "ARROW-DOWN");
break;
case ARROW_LEFT:
strcpy(key_str, "ARROW-LEFT");
break;
case ARROW_RIGHT:
strcpy(key_str, "ARROW-RIGHT");
break;
case PAGE_UP:
strcpy(key_str, "PAGE-UP");
fprintf(stderr, "pagr up\n");
break;
case PAGE_DOWN:
strcpy(key_str, "PAGE-DOWN");
break;
case DEL_KEY:
strcpy(key_str, "DEL");
break;
case BACKSPACE:
strcpy(key_str, "BACKSPACE");
break;
case '\r':
strcpy(key_str, "ENTER");
break;
case '\x1b':
strcpy(key_str, "ESCAPE");
break;
case BEG_LINE:
strcpy(key_str, "HOME");
break;
case END_LINE:
strcpy(key_str, "END");
break;
default:
// For regular characters
if (isprint(key)) {
snprintf(key_str, sizeof(key_str), "%c", key);
} else {
snprintf(key_str, sizeof(key_str), "KEY-%d", key);
}
}
}
return key_str;
}
/**
* @brief Moves the cursor based on arrow key input
* @details Updates cursor position (E.cursor_x, E.cursor_y) based on the given
@@ -285,8 +211,8 @@ int editorMoveCursor(int key) {
}
break;
}
return 1;
}
+42 -95
View File
@@ -1,152 +1,99 @@
#include "../include/row_op.h"
#include "../include/data.h"
#include "../include/define.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "include/utf8.h"
extern struct editorConfig E;
int bufferRowCxToRx(frow *row, int cursor_x) {
int render_x = 0;
int i;
for (i = 0; i < cursor_x; ++i) {
if (row->chars[i] == '\t') {
render_x += (E.constantes.TAB_LENGTH - 1) - (render_x % E.constantes.TAB_LENGTH);
}
render_x++;
}
return render_x;
}
int bufferRowRxToCx(frow *row, int rx) {
int cur_rx = 0;
int cx;
for (cx = 0; cx < row->size; cx++) {
if (row->chars[cx] == '\t')
cur_rx += (E.constantes.TAB_LENGTH - 1) - (cur_rx % E.constantes.TAB_LENGTH);
cur_rx++;
if (cur_rx > rx) return cx;
}
return cx;
}
/**
* \fn bufferUpdatfrow(frow *row)
* \brief Copy content of \p row in \p row->render.
* */
void bufferUpdatfrow(frow *row) {
int i, i_render;
int tabs = 0;
// counting number of tabs
for (i = 0; i < row->size; ++i) {
tabs +=
(row->chars[i] == '\t'); /**< increment tabs of 1 if chars[i] is one. */
}
free(row->render);
row->render = malloc(row->size + tabs * (E.constantes.TAB_LENGTH - 1) +
1); /**< Tabs needs E.constantes.TAB_LENGTH chars so E.constantes.TAB_LENGTH - 1
more than the first already counted. */
// end of counting
i_render = 0;
for (i = 0; i < row->size; ++i) {
if (row->chars[i] == '\t') {
row->render[i_render++] = ' ';
while (i_render % E.constantes.TAB_LENGTH) {
row->render[i_render++] =
' '; /**< Addind the right amount of spaces for tabs */
}
} else {
row->render[i_render++] = row->chars[i];
}
}
row->render[i_render] = '\0'; // Don't forget the end of string character.
row->rsize = i_render;
}
void bufferInsertRow(struct buffer_t *buffer, int at, char *s, size_t len) {
if (at < 0 || at > buffer->numrows) {
return;
}
frow *tmp = (frow *)realloc(buffer->row, sizeof(frow) * (buffer->numrows + 1));
row_t *tmp = (row_t *)realloc(buffer->row, sizeof(row_t) * (buffer->numrows + 1));
if (!tmp) {
return;
}
buffer->row = tmp;
memmove(&buffer->row[at + 1], &buffer->row[at], sizeof(frow) * (buffer->numrows - at));
memmove(&buffer->row[at + 1], &buffer->row[at], sizeof(row_t) * (buffer->numrows - at));
buffer->row[at].size = len;
buffer->row[at].cap = len + 1;
buffer->row[at].chars = malloc(len + 1);
memcpy(buffer->row[at].chars, s, len);
buffer->row[at].chars[len] = '\0';
buffer->row[at].chars[len] = '\n';
buffer->row[at].rsize = 0;
buffer->row[at].render = NULL;
bufferUpdatfrow(&buffer->row[at]);
++buffer->numrows;
++buffer->dirty;
}
void bufferFrefrow(frow *row) {
free(row->render);
void bufferFreeRow(row_t *row) {
free(row->chars);
}
void bufferDelRow(struct buffer_t *buffer, int at) {
if (at < 0 || at >= buffer->numrows) {
return;
int editorRowCxToByte(const row_t *row, int cursor_x) {
int i = 0, col = 0;
while (col < cursor_x && i < row->size) {
int sl = utf8Seqlen((unsigned char)row->chars[i]);
if (sl < 1) sl = 1;
col++;
i += sl;
}
bufferFrefrow(&buffer->row[at]);
memmove(&buffer->row[at], &buffer->row[at + 1], sizeof(frow) * (buffer->numrows - at - 1));
--buffer->numrows;
++buffer->dirty;
return i;
}
/**
* \fn bufferRowInsertChar(frow *row, int at, int c)
* \fn editorRowInsertChar(erow *row, int at, int c)
* \param at Index of where we want to insert the char */
void bufferRowInsertChar(struct buffer_t *buffer, frow *row, int at, int c) {
void bufferRowInsertBytes(struct buffer_t *buffer, row_t *row, int at, char *src, int n) {
if (buffer->state == READ_ONLY)
return;
if (at < 0 || at > row->size) {
at = row->size;
if (row->size + n + 1 > row->cap) {
row->cap = (row->size + n + 1) * 2;
row->chars = realloc(row->chars, row->cap);
}
memmove(row->chars + at + n, row->chars + at, row->size - at);
memcpy(row->chars + at, src, n);
row->size += n;
row->chars = realloc(row->chars, row->size + 2);
memmove(&row->chars[at + 1], &row->chars[at], row->size - at + 1);
++row->size;
row->chars[at] = c;
bufferUpdatfrow(row);
++buffer->dirty;
}
void bufferRowAppendString(struct buffer_t *buffer, frow *row, char *s, size_t len) {
row->chars = realloc(row->chars, row->size + len + 1);
memcpy(&row->chars[row->size], s, len);
row->size += len;
row->chars[row->size] = '\0';
bufferUpdatfrow(row);
++buffer->dirty;
}
/**
* \fn bufferRowDelChar(struct bufferConfig *E, frow *frow, int at)
* \brief Delete the a char at the chosen position on the given row
* \param at Index of the char to delete
* \param row Row on operation is made */
void bufferRowDelchar(struct buffer_t *buffer, frow *row, int at) {
void bufferRowDelByte(struct buffer_t *buffer, row_t *row, int at, int n)
{
if (at < 0 || at >= row->size) {
return;
memmove(row->chars + at, row->chars + at + n, row->size - at - n);
row->size -= n;
row->chars[row->size] = '\0';
}
memmove(&row->chars[at], &row->chars[at + 1], row->size - at);
--row->size;
bufferUpdatfrow(row);
++buffer->dirty;
}
int editorRowCharCount(row_t *row)
{
int n = 0, i = 0;
while (i < row->size) {
int sl = utf8Seqlen((unsigned char)row->chars[i]);
if (sl < 1) sl = 1;
n++; i += sl;
}
return n;
}
+95 -63
View File
@@ -1,7 +1,15 @@
#include "../include/terminal.h"
#include <ctype.h>
#include "../include/data.h"
#include "../include/define.h"
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include "include/utf8.h"
void die(const char *s) {
write(STDOUT_FILENO, "\x1b[2J", 4);
@@ -35,73 +43,97 @@ void enableRawMode() {
}
}
int editorReadKey() {
int nread;
char c;
char seq[3];
while ((nread = read(STDIN_FILENO, &c, 1)) != 1) {
if (nread == -1 && errno != EAGAIN) {
die("read");
}
}
#include <ctype.h> /* isprint */
if (c == '\x1b') {
if (read(STDIN_FILENO, &seq[0], 1) != 1 ||
read(STDIN_FILENO, &seq[1], 1) != 1) {
return '\x1b';
}
if (seq[0] == '[') {
if (seq[1] >= '0' && seq[1] <= '9') {
if (read(STDIN_FILENO, &seq[2], 1) != 1) {
return '\x1b';
}
if (seq[2] == '~') {
switch (seq[1]) {
case '1':
return BEG_LINE;
case '3':
return DEL_KEY;
case '4':
return END_LINE;
case '5':
return PAGE_UP;
case '6':
return PAGE_DOWN;
case '7':
return BEG_LINE;
case '8':
return END_LINE;
}
}
} else {
char *key_to_string(int key) {
static char key_str[32];
switch (seq[1]) {
case 'A':
return ARROW_UP;
case 'B':
return ARROW_DOWN;
case 'C':
return ARROW_RIGHT;
case 'D':
return ARROW_LEFT;
case 'H':
return BEG_LINE;
case 'F':
return END_LINE;
}
}
} else if (seq[0] == 'O') {
switch (seq[1]) {
case 'H':
return BEG_LINE;
case 'F':
return END_LINE;
}
}
return '\x1b';
if (key == '\r') {
strcpy(key_str, "ENTER");
} else if (key >= 1 && key <= 26) {
snprintf(key_str, sizeof(key_str), "CTRL-%c", 'a' + key - 1);
} else {
return c;
switch (key) {
case ARROW_UP: strcpy(key_str, "ARROW-UP"); break;
case ARROW_DOWN: strcpy(key_str, "ARROW-DOWN"); break;
case ARROW_LEFT: strcpy(key_str, "ARROW-LEFT"); break;
case ARROW_RIGHT: strcpy(key_str, "ARROW-RIGHT"); break;
case PAGE_UP: strcpy(key_str, "PAGE-UP"); break;
case PAGE_DOWN: strcpy(key_str, "PAGE-DOWN"); break;
case DEL_KEY: strcpy(key_str, "DEL"); break;
case BACKSPACE: strcpy(key_str, "BACKSPACE"); break;
case BEG_LINE: strcpy(key_str, "HOME"); break;
case END_LINE: strcpy(key_str, "END"); break;
case '\x1b': strcpy(key_str, "ESCAPE"); break;
default:
if (key > 127) {
/* UTF-8 code point — re-encode into the buffer */
char buf[5] = {0};
int n = utf8Encode((uint32_t)key, buf);
snprintf(key_str, sizeof(key_str), "%.*s", n, buf);
} else if (isprint(key)) {
snprintf(key_str, sizeof(key_str), "%c", key);
} else {
snprintf(key_str, sizeof(key_str), "KEY-%d", key);
}
}
}
return key_str;
}
int editorReadKey() {
char c;
/* read first byte — may be start of UTF-8 or escape */
while (read(STDIN_FILENO, &c, 1) != 1);
if (c == '\x1b') {
char seq[6];
/* try to read escape sequence */
if (read(STDIN_FILENO, &seq[0], 1) != 1) return '\x1b';
if (read(STDIN_FILENO, &seq[1], 1) != 1) return '\x1b';
if (seq[0] == '[') {
if (seq[1] >= '0' && seq[1] <= '9') {
if (read(STDIN_FILENO, &seq[2], 1) != 1) return '\x1b';
if (seq[2] == '~') {
switch (seq[1]) {
case '1': return BEG_LINE;
case '3': return DEL_KEY;
case '4': return END_LINE;
case '5': return PAGE_UP;
case '6': return PAGE_DOWN;
case '7': return BEG_LINE;
case '8': return END_LINE;
}
}
} else {
switch (seq[1]) {
case 'A': return ARROW_UP;
case 'B': return ARROW_DOWN;
case 'C': return ARROW_RIGHT;
case 'D': return ARROW_LEFT;
case 'H': return BEG_LINE;
case 'F': return END_LINE;
}
}
}
return '\x1b';
}
/* multi-byte UTF-8: read remaining bytes */
int seqlen = utf8Seqlen((unsigned char)c);
if (seqlen > 1) {
/* pack into a pseudo-codepoint just to pass bytes through;
we handle encoding/decoding at the row level */
char buf[4] = {c, 0, 0, 0};
for (int i = 1; i < seqlen; i++)
if (read(STDIN_FILENO, &buf[i], 1) != 1) break;
/* decode and return as uint32, but we need int — use high range */
const char *p = buf;
uint32_t cp = utf8Decode(&p);
return (int)cp; /* caller re-encodes when inserting */
}
return (unsigned char)c;
}
int getCursorPosition(int *rows, int *cols) {
+148
View File
@@ -0,0 +1,148 @@
/**
* @file utf8.c
*/
#include "../include/utf8.h"
#include "../include/data.h"
#include <stdint.h>
#include <unistd.h>
uint32_t readUtf8Char(void)
{
unsigned char buf[4];
read(STDIN_FILENO, &buf[0], 1);
int extra;
uint32_t cp;
if (buf[0] < 0x80)
{
cp = buf[0];
extra = 0;
}
else if (buf[0] < 0xC0) { return 0xFFFD; } // stray continuation
else if (buf[0] < 0xE0)
{
cp = buf[0] & 0x1F;
extra = 1;
}
else if (buf[0] < 0xF0)
{
cp = buf[0] & 0x0F;
extra = 2;
}
else
{
cp = buf[0] & 0x07;
extra = 3;
}
if (extra > 0)
{
read(STDIN_FILENO, &buf[1], extra); // read remaining bytes at once
for (int i = 0; i < extra; i++)
cp = (cp << 6) | (buf[1 + i] & 0x3F);
}
return cp;
}
uint32_t utf8Decode(const char** s)
{
unsigned char c = (unsigned char)**s;
uint32_t cp;
int extra;
if (c < 0x80)
{
cp = c;
extra = 0;
}
else if (c < 0xC0)
{
(*s)++;
return 0xFFFD;
}
else if (c < 0xE0)
{
cp = c & 0x1F;
extra = 1;
}
else if (c < 0xF0)
{
cp = c & 0x0F;
extra = 2;
}
else
{
cp = c & 0x07;
extra = 3;
}
(*s)++;
while (extra--)
{
c = (unsigned char)**s;
if ((c & 0xC0) != 0x80) return 0xFFFD;
cp = (cp << 6) | (c & 0x3F);
(*s)++;
}
return cp;
}
// buf must have at least 4 bytes; returns bytes written
int utf8Encode(uint32_t cp, char* buf)
{
if (cp < 0x80)
{
buf[0] = cp;
return 1;
}
if (cp < 0x800)
{
buf[0] = 0xC0 | (cp >> 6);
buf[1] = 0x80 | (cp & 0x3F);
return 2;
}
if (cp < 0x10000)
{
buf[0] = 0xE0 | (cp >> 12);
buf[1] = 0x80 | ((cp >> 6) & 0x3F);
buf[2] = 0x80 | (cp & 0x3F);
return 3;
}
buf[0] = 0xF0 | (cp >> 18);
buf[1] = 0x80 | ((cp >> 12) & 0x3F);
buf[2] = 0x80 | ((cp >> 6) & 0x3F);
buf[3] = 0x80 | (cp & 0x3F);
return 4;
}
int utf8Seqlen(unsigned char c)
{
if (c < 0x80) return 1;
if (c < 0xC0) return 0; /* continuation — shouldn't be leading */
if (c < 0xE0) return 2;
if (c < 0xF0) return 3;
return 4;
}
/**
* @param codepoint utf8 codepoint of a char
* @return length of the codepoint
*/
int codepointWidth(uint32_t codepoint)
{
if (codepoint < 0x20 || codepoint == 0x7F) return 0;
/* rough double-width ranges */
if ((codepoint >= 0x1100 && codepoint <= 0x115F) ||
(codepoint >= 0x2E80 && codepoint <= 0x303E) ||
(codepoint >= 0x3041 && codepoint <= 0x33BF) ||
(codepoint >= 0xAC00 && codepoint <= 0xD7AF) ||
(codepoint >= 0xF900 && codepoint <= 0xFAFF) ||
(codepoint >= 0xFF01 && codepoint <= 0xFF60) ||
(codepoint >= 0x1F300 && codepoint <= 0x1FAFF))
return 2;
return 1;
}