#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct utf8_char {
    char *bytes;
    size_t length;
} utf8_char;

typedef enum {
    single_ASCII_byte,
    trailing_byte,
    head_double_byte,
    head_triple_byte,
    head_quad_byte,
    invalid_utf8_byte,
} utf8_byte_type;

utf8_byte_type get_utf8_byte_type(unsigned char input) {

    if ((input & 0b10000000) == 0b10000000) {

        if ((input & 0b11000000) == 0b10000000) {
            return trailing_byte;
        }
        if ((input & 0b11100000) == 0b11000000) {
            return head_double_byte;
        }
        if ((input & 0b11110000) == 0b11100000) {
            return head_triple_byte;
        }
        if ((input & 0b11111000) == 0b11110000) {
            return head_quad_byte;
        }
        return invalid_utf8_byte;
    }
    return single_ASCII_byte;
}

void push_byte_buffer_to_utf8_str(utf8_char **utf8_str, size_t *utf8_str_length, char *byte_buffer, int buf_length) {
    size_t current_length = *utf8_str_length;
    (*utf8_str)[current_length].bytes = byte_buffer;
    (*utf8_str)[current_length].length = buf_length;

    *utf8_str_length = current_length + 1;
}

void free_utf8_str(utf8_char *str, int length) {
    for (int i = 0; i < length; i++) {
        free(str[i].bytes);
    }
    free(str);
}

int parse_to_utf8(char *input, utf8_char **output_string, size_t *output_length) {
    // TODO: 5 levels of indent is too much. Lower it some.
    const size_t input_str_length = strlen(input);
    utf8_char *output_str = malloc(input_str_length * sizeof(utf8_char));
    size_t utf8_str_len = 0;

    // TODO: can this be converted back to for loop?
    size_t i = 0;
    while (i < input_str_length) {
        unsigned char current_char = input[i];
        utf8_byte_type byte_type = get_utf8_byte_type(current_char);

        switch (byte_type) {
        case single_ASCII_byte: {
            char *byte_buffer = malloc(sizeof(char));
            *byte_buffer = current_char;
            push_byte_buffer_to_utf8_str(&output_str, &utf8_str_len, byte_buffer, 1);
            i++;
            break;
        }
        case head_double_byte:
        case head_triple_byte:
        case head_quad_byte: {
            size_t j;
            char *byte_buffer = malloc(byte_type);
            // Insert heading byte into buffer
            byte_buffer[0] = input[i];
            // Insert following bytes into buffer
            for (j = i + 1; j < i + byte_type; j++) {
                const unsigned char d_cchar = input[j];
                if (d_cchar == '\0')
                    break;

                utf8_byte_type b_type = get_utf8_byte_type(d_cchar);
                if (b_type != trailing_byte) {
                    free_utf8_str(output_str, utf8_str_len);
                    return 1;
                }
                byte_buffer[j - i] = d_cchar;
            }
            i = j;
            push_byte_buffer_to_utf8_str(&output_str, &utf8_str_len, byte_buffer, byte_type);

            break;
        }
        case invalid_utf8_byte:
        // Trailing byte without a head byte is invalid, thus should create an error
        case trailing_byte:
            free_utf8_str(output_str, utf8_str_len);
            return 1;
        }
    }
    if (input_str_length != utf8_str_len) {
        // Shrink output string to proper size
        output_str = realloc(output_str, utf8_str_len);
    }
    *output_length = utf8_str_len;
    *output_string = output_str;
    return 0;
}

bool utf8_char_eq(utf8_char *a, utf8_char *b) {
    if (a->length != b->length) {
        return false;
    }
    size_t length = a->length;
    for (size_t i = 0; i < length; i++) {
        if (a->bytes[i] != b->bytes[i]) {
            return false;
        }
    }
    return true;
}

void print_utf8_str(utf8_char *str, size_t length) {
    for (size_t i = 0; i < length; i++) {
        for (size_t j = 0; j < str[i].length; j++) {
            putc(str[i].bytes[j], stdout);
        }
    }
};

utf8_char copy_utf8_char(utf8_char *from) {
    utf8_char to_char;

    to_char.length = from->length;
    to_char.bytes = malloc(sizeof(char) * from->length);
    for (size_t i = 0; i < from->length; i++) {
        to_char.bytes[i] = from->bytes[i];
    }
    return to_char;
}