Initial Commit
This commit is contained in:
commit
41d574f45f
2
.clang-format
Executable file
2
.clang-format
Executable file
|
@ -0,0 +1,2 @@
|
||||||
|
ColumnLimit: 120
|
||||||
|
IndentWidth: 4
|
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
acaesar
|
20
README.md
Normal file
20
README.md
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# ACaesar - Caesar cipher transcoder
|
||||||
|
|
||||||
|
This program encodes and decodes messages using a [Caesar Cipher](https://en.wikipedia.org/wiki/Caesar_cipher).
|
||||||
|
|
||||||
|
```
|
||||||
|
Usage: acaesar [options] message
|
||||||
|
Options:
|
||||||
|
--help Print this help message
|
||||||
|
-o <offset> Set the integer offset used in transcoding
|
||||||
|
-c <character set> Set the character set
|
||||||
|
--xyz Set the character set to the lowercase alphabet (default)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Compiling
|
||||||
|
|
||||||
|
The program is fairly simple and thus it should be able to be compiled with any modern C compiler. This was developed using GCC on Ubuntu. A makefile is included which has only one command:
|
||||||
|
|
||||||
|
```
|
||||||
|
gcc acaesar.c utf8hack.c -O3 -Wall -Wextra -o acaesar
|
||||||
|
```
|
160
acaesar.c
Executable file
160
acaesar.c
Executable file
|
@ -0,0 +1,160 @@
|
||||||
|
|
||||||
|
#include "utf8hack.h"
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
int get_index_of_utf8_char(utf8_char *utf8_string, size_t string_length, utf8_char search_char);
|
||||||
|
|
||||||
|
void transcode(long offset, char *charset_str, char *message_str);
|
||||||
|
|
||||||
|
#define HELP_MESSAGE \
|
||||||
|
"ACaesar - Caesar Cipher transcoder version 0.1\n\
|
||||||
|
Usage: acaesar [options] message\n\
|
||||||
|
Options:\n\
|
||||||
|
--help Print this help message\n\
|
||||||
|
-o <offset> Set the integer offset used in transcoding\n\
|
||||||
|
-c <character set> Set the character set\n\
|
||||||
|
--xyz Set the character set to the lowercase alphabet (default)\n"
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
|
bool charset_set = false;
|
||||||
|
bool message_set = false;
|
||||||
|
bool offset_set = false;
|
||||||
|
bool alloc_charset = false;
|
||||||
|
long offset;
|
||||||
|
char *charset;
|
||||||
|
char *message;
|
||||||
|
|
||||||
|
// Argument parsing
|
||||||
|
for (int i = 1; i < argc; i++) {
|
||||||
|
|
||||||
|
char *current_arg = argv[i];
|
||||||
|
// Help flag
|
||||||
|
if (strcmp(current_arg, "--help") == 0) {
|
||||||
|
printf(HELP_MESSAGE);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
// Charset flag
|
||||||
|
if (strcmp(current_arg, "-c") == 0) {
|
||||||
|
if (i == argc - 1) {
|
||||||
|
fprintf(stderr, "Error, charset (-c) flag found without following charset.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (charset_set) {
|
||||||
|
fprintf(stderr, "Error, character set specified twice.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
char *next_arg = argv[i + 1];
|
||||||
|
size_t size = strlen(next_arg);
|
||||||
|
charset = malloc(size * sizeof(char) + 1);
|
||||||
|
strcpy(charset, next_arg);
|
||||||
|
charset_set = true;
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Offset flag
|
||||||
|
if (strcmp(current_arg, "-o") == 0) {
|
||||||
|
if (i == argc - 1) {
|
||||||
|
fprintf(stderr, "Error, offset (-o) flag found without following offset.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (offset_set) {
|
||||||
|
fprintf(stderr, "Error, offset (-o) specified twice.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
char *next_arg = argv[i + 1];
|
||||||
|
offset = strtol(next_arg, NULL, 10);
|
||||||
|
offset_set = true;
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// alphabetical charset flag. (default anyways). Only exists for completeness.
|
||||||
|
if (strcmp(current_arg, "--xyz") == 0) {
|
||||||
|
if (charset_set) {
|
||||||
|
fprintf(stderr, "Error, character set specified twice.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
charset = "abcdefghijklmnopqrstuvwxyz";
|
||||||
|
charset_set = true;
|
||||||
|
alloc_charset = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (i != argc - 1) {
|
||||||
|
// Unrecognized flag
|
||||||
|
fprintf(stderr, "Error, unknown argument found: '%s'\n", current_arg);
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
// Message to be transcoded.
|
||||||
|
// Last argument is assumed to be message.
|
||||||
|
size_t size = strlen(current_arg);
|
||||||
|
message = malloc(size * sizeof(char) + 1);
|
||||||
|
strcpy(message, current_arg);
|
||||||
|
message_set = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!charset_set) {
|
||||||
|
// Default to a...z when no charset is found.
|
||||||
|
charset = "abcdefghijklmnopqrstuvwxyz";
|
||||||
|
charset_set = true;
|
||||||
|
}
|
||||||
|
if (!offset_set) {
|
||||||
|
fprintf(stderr, "Error, no offset found. Use '-o' to set one.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (!message_set) {
|
||||||
|
fprintf(stderr, "Error, no message found to transcode.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
transcode(offset, charset, message);
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
free(message);
|
||||||
|
// In some cases, charset is malloc'd and in others it is set to be a string literal.
|
||||||
|
// String literals can not bee freed so `alloc_charset` is used to determine when charset should be freed
|
||||||
|
if (alloc_charset)
|
||||||
|
free(charset);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_index_of_utf8_char(utf8_char *utf8_string, size_t string_length, utf8_char search_char) {
|
||||||
|
for (size_t i = 0; i < string_length; i++) {
|
||||||
|
if (utf8_char_eq(&utf8_string[i], &search_char)) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error handling would be nice for this function.
|
||||||
|
void transcode(long offset, char *charset_str, char *message_str) {
|
||||||
|
size_t charset_length;
|
||||||
|
utf8_char *charset;
|
||||||
|
parse_to_utf8(charset_str, &charset, &charset_length);
|
||||||
|
size_t message_length;
|
||||||
|
utf8_char *message;
|
||||||
|
parse_to_utf8(message_str, &message, &message_length);
|
||||||
|
|
||||||
|
utf8_char *transcoded_message = malloc(message_length * sizeof(utf8_char));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < message_length; i++) {
|
||||||
|
// A hashmap would likely be more efficient for larger messages, but would introduce much complexity.
|
||||||
|
int a = get_index_of_utf8_char(charset, charset_length, message[i]);
|
||||||
|
if (a != -1) {
|
||||||
|
transcoded_message[i] = copy_utf8_char(&charset[(a + offset) % charset_length]);
|
||||||
|
} else {
|
||||||
|
// If character is not in character set, copy it over unchanged.
|
||||||
|
transcoded_message[i] = copy_utf8_char(&message[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print_utf8_str(transcoded_message, message_length);
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
free_utf8_str(charset, charset_length);
|
||||||
|
free_utf8_str(message, message_length);
|
||||||
|
free_utf8_str(transcoded_message, message_length);
|
||||||
|
}
|
2
makefile
Normal file
2
makefile
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
build:
|
||||||
|
gcc acaesar.c utf8hack.c -O3 -Wall -Wextra -o acaesar
|
146
utf8hack.c
Normal file
146
utf8hack.c
Normal file
|
@ -0,0 +1,146 @@
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
typedef struct utf8_char {
|
||||||
|
char *bytes;
|
||||||
|
size_t length;
|
||||||
|
} utf8_char;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
single_ASCII_byte,
|
||||||
|
trailing_byte,
|
||||||
|
head_double_byte,
|
||||||
|
head_triple_byte,
|
||||||
|
head_quad_byte,
|
||||||
|
invalid_utf8_byte,
|
||||||
|
} utf8_byte_type;
|
||||||
|
|
||||||
|
utf8_byte_type get_utf8_byte_type(unsigned char input) {
|
||||||
|
|
||||||
|
if ((input & 0b10000000) == 0b10000000) {
|
||||||
|
|
||||||
|
if ((input & 0b11000000) == 0b10000000) {
|
||||||
|
return trailing_byte;
|
||||||
|
}
|
||||||
|
if ((input & 0b11100000) == 0b11000000) {
|
||||||
|
return head_double_byte;
|
||||||
|
}
|
||||||
|
if ((input & 0b11110000) == 0b11100000) {
|
||||||
|
return head_triple_byte;
|
||||||
|
}
|
||||||
|
if ((input & 0b11111000) == 0b11110000) {
|
||||||
|
return head_quad_byte;
|
||||||
|
}
|
||||||
|
return invalid_utf8_byte;
|
||||||
|
}
|
||||||
|
return single_ASCII_byte;
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_byte_buffer_to_utf8_str(utf8_char **utf8_str, size_t *utf8_str_length, char *byte_buffer, int buf_length) {
|
||||||
|
size_t current_length = *utf8_str_length;
|
||||||
|
(*utf8_str)[current_length].bytes = byte_buffer;
|
||||||
|
(*utf8_str)[current_length].length = buf_length;
|
||||||
|
|
||||||
|
*utf8_str_length = current_length + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_utf8_str(utf8_char *str, int length) {
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
free(str[i].bytes);
|
||||||
|
}
|
||||||
|
free(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
int parse_to_utf8(char *input, utf8_char **output_string, size_t *output_length) {
|
||||||
|
// TODO: 5 levels of indent is too much. Lower it some.
|
||||||
|
const size_t input_str_length = strlen(input);
|
||||||
|
utf8_char *output_str = malloc(input_str_length * sizeof(utf8_char));
|
||||||
|
size_t utf8_str_len = 0;
|
||||||
|
|
||||||
|
// TODO: can this be converted back to for loop?
|
||||||
|
size_t i = 0;
|
||||||
|
while (i < input_str_length) {
|
||||||
|
unsigned char current_char = input[i];
|
||||||
|
utf8_byte_type byte_type = get_utf8_byte_type(current_char);
|
||||||
|
|
||||||
|
switch (byte_type) {
|
||||||
|
case single_ASCII_byte: {
|
||||||
|
char *byte_buffer = malloc(sizeof(char));
|
||||||
|
*byte_buffer = current_char;
|
||||||
|
push_byte_buffer_to_utf8_str(&output_str, &utf8_str_len, byte_buffer, 1);
|
||||||
|
i++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case head_double_byte:
|
||||||
|
case head_triple_byte:
|
||||||
|
case head_quad_byte: {
|
||||||
|
size_t j;
|
||||||
|
char *byte_buffer = malloc(byte_type);
|
||||||
|
// Insert heading byte into buffer
|
||||||
|
byte_buffer[0] = input[i];
|
||||||
|
// Insert following bytes into buffer
|
||||||
|
for (j = i + 1; j < i + byte_type; j++) {
|
||||||
|
const unsigned char d_cchar = input[j];
|
||||||
|
if (d_cchar == '\0')
|
||||||
|
break;
|
||||||
|
|
||||||
|
utf8_byte_type b_type = get_utf8_byte_type(d_cchar);
|
||||||
|
if (b_type != trailing_byte) {
|
||||||
|
free_utf8_str(output_str, utf8_str_len);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
byte_buffer[j - i] = d_cchar;
|
||||||
|
}
|
||||||
|
i = j;
|
||||||
|
push_byte_buffer_to_utf8_str(&output_str, &utf8_str_len, byte_buffer, byte_type);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case invalid_utf8_byte:
|
||||||
|
// Trailing byte without a head byte is invalid, thus should create an error
|
||||||
|
case trailing_byte:
|
||||||
|
free_utf8_str(output_str, utf8_str_len);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (input_str_length != utf8_str_len) {
|
||||||
|
// Shrink output string to proper size
|
||||||
|
output_str = realloc(output_str, utf8_str_len);
|
||||||
|
}
|
||||||
|
*output_length = utf8_str_len;
|
||||||
|
*output_string = output_str;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool utf8_char_eq(utf8_char *a, utf8_char *b) {
|
||||||
|
if (a->length != b->length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
size_t length = a->length;
|
||||||
|
for (size_t i = 0; i < length; i++) {
|
||||||
|
if (a->bytes[i] != b->bytes[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_utf8_str(utf8_char *str, size_t length) {
|
||||||
|
for (size_t i = 0; i < length; i++) {
|
||||||
|
for (size_t j = 0; j < str[i].length; j++) {
|
||||||
|
putc(str[i].bytes[j], stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
utf8_char copy_utf8_char(utf8_char *from) {
|
||||||
|
utf8_char to_char;
|
||||||
|
|
||||||
|
to_char.length = from->length;
|
||||||
|
to_char.bytes = malloc(sizeof(char) * from->length);
|
||||||
|
for (size_t i = 0; i < from->length; i++) {
|
||||||
|
to_char.bytes[i] = from->bytes[i];
|
||||||
|
}
|
||||||
|
return to_char;
|
||||||
|
}
|
19
utf8hack.h
Normal file
19
utf8hack.h
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
#ifndef UTF8_HACK
|
||||||
|
#define UTF8_HACK
|
||||||
|
// hacky implementation of UTF-8 parsing. Doesn't do much validation.
|
||||||
|
// Primary purpose is to separate bytes into unicode codepoints.
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
typedef struct utf8_char {
|
||||||
|
char *bytes;
|
||||||
|
size_t length;
|
||||||
|
} utf8_char;
|
||||||
|
|
||||||
|
bool utf8_char_eq(utf8_char *a, utf8_char *b);
|
||||||
|
int parse_to_utf8(char *input, utf8_char **output_string, size_t *output_length);
|
||||||
|
void print_utf8_str(utf8_char *str, size_t length);
|
||||||
|
void free_utf8_str(utf8_char *str, int length);
|
||||||
|
utf8_char copy_utf8_char(utf8_char *from);
|
||||||
|
|
||||||
|
#endif // UTF8_HACK
|
Loading…
Reference in a new issue