Initial Commit
This commit is contained in:
commit
41d574f45f
2
.clang-format
Executable file
2
.clang-format
Executable file
|
@ -0,0 +1,2 @@
|
|||
ColumnLimit: 120
|
||||
IndentWidth: 4
|
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
acaesar
|
20
README.md
Normal file
20
README.md
Normal file
|
@ -0,0 +1,20 @@
|
|||
# ACaesar - Caesar cipher transcoder
|
||||
|
||||
This program encodes and decodes messages using a [Caesar Cipher](https://en.wikipedia.org/wiki/Caesar_cipher).
|
||||
|
||||
```
|
||||
Usage: acaesar [options] message
|
||||
Options:
|
||||
--help Print this help message
|
||||
-o <offset> Set the integer offset used in transcoding
|
||||
-c <character set> Set the character set
|
||||
--xyz Set the character set to the lowercase alphabet (default)
|
||||
```
|
||||
|
||||
## Compiling
|
||||
|
||||
The program is fairly simple and thus it should be able to be compiled with any modern C compiler. This was developed using GCC on Ubuntu. A makefile is included which has only one command:
|
||||
|
||||
```
|
||||
gcc acaesar.c utf8hack.c -O3 -Wall -Wextra -o acaesar
|
||||
```
|
160
acaesar.c
Executable file
160
acaesar.c
Executable file
|
@ -0,0 +1,160 @@
|
|||
|
||||
#include "utf8hack.h"
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
int get_index_of_utf8_char(utf8_char *utf8_string, size_t string_length, utf8_char search_char);
|
||||
|
||||
void transcode(long offset, char *charset_str, char *message_str);
|
||||
|
||||
#define HELP_MESSAGE \
|
||||
"ACaesar - Caesar Cipher transcoder version 0.1\n\
|
||||
Usage: acaesar [options] message\n\
|
||||
Options:\n\
|
||||
--help Print this help message\n\
|
||||
-o <offset> Set the integer offset used in transcoding\n\
|
||||
-c <character set> Set the character set\n\
|
||||
--xyz Set the character set to the lowercase alphabet (default)\n"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
bool charset_set = false;
|
||||
bool message_set = false;
|
||||
bool offset_set = false;
|
||||
bool alloc_charset = false;
|
||||
long offset;
|
||||
char *charset;
|
||||
char *message;
|
||||
|
||||
// Argument parsing
|
||||
for (int i = 1; i < argc; i++) {
|
||||
|
||||
char *current_arg = argv[i];
|
||||
// Help flag
|
||||
if (strcmp(current_arg, "--help") == 0) {
|
||||
printf(HELP_MESSAGE);
|
||||
return 0;
|
||||
}
|
||||
// Charset flag
|
||||
if (strcmp(current_arg, "-c") == 0) {
|
||||
if (i == argc - 1) {
|
||||
fprintf(stderr, "Error, charset (-c) flag found without following charset.\n");
|
||||
return 1;
|
||||
}
|
||||
if (charset_set) {
|
||||
fprintf(stderr, "Error, character set specified twice.\n");
|
||||
return 1;
|
||||
}
|
||||
char *next_arg = argv[i + 1];
|
||||
size_t size = strlen(next_arg);
|
||||
charset = malloc(size * sizeof(char) + 1);
|
||||
strcpy(charset, next_arg);
|
||||
charset_set = true;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// Offset flag
|
||||
if (strcmp(current_arg, "-o") == 0) {
|
||||
if (i == argc - 1) {
|
||||
fprintf(stderr, "Error, offset (-o) flag found without following offset.\n");
|
||||
return 1;
|
||||
}
|
||||
if (offset_set) {
|
||||
fprintf(stderr, "Error, offset (-o) specified twice.\n");
|
||||
return 1;
|
||||
}
|
||||
char *next_arg = argv[i + 1];
|
||||
offset = strtol(next_arg, NULL, 10);
|
||||
offset_set = true;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// alphabetical charset flag. (default anyways). Only exists for completeness.
|
||||
if (strcmp(current_arg, "--xyz") == 0) {
|
||||
if (charset_set) {
|
||||
fprintf(stderr, "Error, character set specified twice.\n");
|
||||
return 1;
|
||||
}
|
||||
charset = "abcdefghijklmnopqrstuvwxyz";
|
||||
charset_set = true;
|
||||
alloc_charset = true;
|
||||
continue;
|
||||
}
|
||||
if (i != argc - 1) {
|
||||
// Unrecognized flag
|
||||
fprintf(stderr, "Error, unknown argument found: '%s'\n", current_arg);
|
||||
return 1;
|
||||
} else {
|
||||
// Message to be transcoded.
|
||||
// Last argument is assumed to be message.
|
||||
size_t size = strlen(current_arg);
|
||||
message = malloc(size * sizeof(char) + 1);
|
||||
strcpy(message, current_arg);
|
||||
message_set = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!charset_set) {
|
||||
// Default to a...z when no charset is found.
|
||||
charset = "abcdefghijklmnopqrstuvwxyz";
|
||||
charset_set = true;
|
||||
}
|
||||
if (!offset_set) {
|
||||
fprintf(stderr, "Error, no offset found. Use '-o' to set one.\n");
|
||||
return 1;
|
||||
}
|
||||
if (!message_set) {
|
||||
fprintf(stderr, "Error, no message found to transcode.\n");
|
||||
return 1;
|
||||
}
|
||||
transcode(offset, charset, message);
|
||||
|
||||
// Clean up
|
||||
free(message);
|
||||
// In some cases, charset is malloc'd and in others it is set to be a string literal.
|
||||
// String literals can not bee freed so `alloc_charset` is used to determine when charset should be freed
|
||||
if (alloc_charset)
|
||||
free(charset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_index_of_utf8_char(utf8_char *utf8_string, size_t string_length, utf8_char search_char) {
|
||||
for (size_t i = 0; i < string_length; i++) {
|
||||
if (utf8_char_eq(&utf8_string[i], &search_char)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Error handling would be nice for this function.
|
||||
void transcode(long offset, char *charset_str, char *message_str) {
|
||||
size_t charset_length;
|
||||
utf8_char *charset;
|
||||
parse_to_utf8(charset_str, &charset, &charset_length);
|
||||
size_t message_length;
|
||||
utf8_char *message;
|
||||
parse_to_utf8(message_str, &message, &message_length);
|
||||
|
||||
utf8_char *transcoded_message = malloc(message_length * sizeof(utf8_char));
|
||||
|
||||
for (size_t i = 0; i < message_length; i++) {
|
||||
// A hashmap would likely be more efficient for larger messages, but would introduce much complexity.
|
||||
int a = get_index_of_utf8_char(charset, charset_length, message[i]);
|
||||
if (a != -1) {
|
||||
transcoded_message[i] = copy_utf8_char(&charset[(a + offset) % charset_length]);
|
||||
} else {
|
||||
// If character is not in character set, copy it over unchanged.
|
||||
transcoded_message[i] = copy_utf8_char(&message[i]);
|
||||
}
|
||||
}
|
||||
|
||||
print_utf8_str(transcoded_message, message_length);
|
||||
printf("\n");
|
||||
|
||||
free_utf8_str(charset, charset_length);
|
||||
free_utf8_str(message, message_length);
|
||||
free_utf8_str(transcoded_message, message_length);
|
||||
}
|
2
makefile
Normal file
2
makefile
Normal file
|
@ -0,0 +1,2 @@
|
|||
build:
|
||||
gcc acaesar.c utf8hack.c -O3 -Wall -Wextra -o acaesar
|
146
utf8hack.c
Normal file
146
utf8hack.c
Normal file
|
@ -0,0 +1,146 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
typedef struct utf8_char {
|
||||
char *bytes;
|
||||
size_t length;
|
||||
} utf8_char;
|
||||
|
||||
typedef enum {
|
||||
single_ASCII_byte,
|
||||
trailing_byte,
|
||||
head_double_byte,
|
||||
head_triple_byte,
|
||||
head_quad_byte,
|
||||
invalid_utf8_byte,
|
||||
} utf8_byte_type;
|
||||
|
||||
utf8_byte_type get_utf8_byte_type(unsigned char input) {
|
||||
|
||||
if ((input & 0b10000000) == 0b10000000) {
|
||||
|
||||
if ((input & 0b11000000) == 0b10000000) {
|
||||
return trailing_byte;
|
||||
}
|
||||
if ((input & 0b11100000) == 0b11000000) {
|
||||
return head_double_byte;
|
||||
}
|
||||
if ((input & 0b11110000) == 0b11100000) {
|
||||
return head_triple_byte;
|
||||
}
|
||||
if ((input & 0b11111000) == 0b11110000) {
|
||||
return head_quad_byte;
|
||||
}
|
||||
return invalid_utf8_byte;
|
||||
}
|
||||
return single_ASCII_byte;
|
||||
}
|
||||
|
||||
void push_byte_buffer_to_utf8_str(utf8_char **utf8_str, size_t *utf8_str_length, char *byte_buffer, int buf_length) {
|
||||
size_t current_length = *utf8_str_length;
|
||||
(*utf8_str)[current_length].bytes = byte_buffer;
|
||||
(*utf8_str)[current_length].length = buf_length;
|
||||
|
||||
*utf8_str_length = current_length + 1;
|
||||
}
|
||||
|
||||
void free_utf8_str(utf8_char *str, int length) {
|
||||
for (int i = 0; i < length; i++) {
|
||||
free(str[i].bytes);
|
||||
}
|
||||
free(str);
|
||||
}
|
||||
|
||||
int parse_to_utf8(char *input, utf8_char **output_string, size_t *output_length) {
|
||||
// TODO: 5 levels of indent is too much. Lower it some.
|
||||
const size_t input_str_length = strlen(input);
|
||||
utf8_char *output_str = malloc(input_str_length * sizeof(utf8_char));
|
||||
size_t utf8_str_len = 0;
|
||||
|
||||
// TODO: can this be converted back to for loop?
|
||||
size_t i = 0;
|
||||
while (i < input_str_length) {
|
||||
unsigned char current_char = input[i];
|
||||
utf8_byte_type byte_type = get_utf8_byte_type(current_char);
|
||||
|
||||
switch (byte_type) {
|
||||
case single_ASCII_byte: {
|
||||
char *byte_buffer = malloc(sizeof(char));
|
||||
*byte_buffer = current_char;
|
||||
push_byte_buffer_to_utf8_str(&output_str, &utf8_str_len, byte_buffer, 1);
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
case head_double_byte:
|
||||
case head_triple_byte:
|
||||
case head_quad_byte: {
|
||||
size_t j;
|
||||
char *byte_buffer = malloc(byte_type);
|
||||
// Insert heading byte into buffer
|
||||
byte_buffer[0] = input[i];
|
||||
// Insert following bytes into buffer
|
||||
for (j = i + 1; j < i + byte_type; j++) {
|
||||
const unsigned char d_cchar = input[j];
|
||||
if (d_cchar == '\0')
|
||||
break;
|
||||
|
||||
utf8_byte_type b_type = get_utf8_byte_type(d_cchar);
|
||||
if (b_type != trailing_byte) {
|
||||
free_utf8_str(output_str, utf8_str_len);
|
||||
return 1;
|
||||
}
|
||||
byte_buffer[j - i] = d_cchar;
|
||||
}
|
||||
i = j;
|
||||
push_byte_buffer_to_utf8_str(&output_str, &utf8_str_len, byte_buffer, byte_type);
|
||||
|
||||
break;
|
||||
}
|
||||
case invalid_utf8_byte:
|
||||
// Trailing byte without a head byte is invalid, thus should create an error
|
||||
case trailing_byte:
|
||||
free_utf8_str(output_str, utf8_str_len);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (input_str_length != utf8_str_len) {
|
||||
// Shrink output string to proper size
|
||||
output_str = realloc(output_str, utf8_str_len);
|
||||
}
|
||||
*output_length = utf8_str_len;
|
||||
*output_string = output_str;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool utf8_char_eq(utf8_char *a, utf8_char *b) {
|
||||
if (a->length != b->length) {
|
||||
return false;
|
||||
}
|
||||
size_t length = a->length;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
if (a->bytes[i] != b->bytes[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void print_utf8_str(utf8_char *str, size_t length) {
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
for (size_t j = 0; j < str[i].length; j++) {
|
||||
putc(str[i].bytes[j], stdout);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
utf8_char copy_utf8_char(utf8_char *from) {
|
||||
utf8_char to_char;
|
||||
|
||||
to_char.length = from->length;
|
||||
to_char.bytes = malloc(sizeof(char) * from->length);
|
||||
for (size_t i = 0; i < from->length; i++) {
|
||||
to_char.bytes[i] = from->bytes[i];
|
||||
}
|
||||
return to_char;
|
||||
}
|
19
utf8hack.h
Normal file
19
utf8hack.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
#ifndef UTF8_HACK
|
||||
#define UTF8_HACK
|
||||
// hacky implementation of UTF-8 parsing. Doesn't do much validation.
|
||||
// Primary purpose is to separate bytes into unicode codepoints.
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
typedef struct utf8_char {
|
||||
char *bytes;
|
||||
size_t length;
|
||||
} utf8_char;
|
||||
|
||||
bool utf8_char_eq(utf8_char *a, utf8_char *b);
|
||||
int parse_to_utf8(char *input, utf8_char **output_string, size_t *output_length);
|
||||
void print_utf8_str(utf8_char *str, size_t length);
|
||||
void free_utf8_str(utf8_char *str, int length);
|
||||
utf8_char copy_utf8_char(utf8_char *from);
|
||||
|
||||
#endif // UTF8_HACK
|
Loading…
Reference in a new issue