[ruby/yarp] Switch from handling const char * to const uint8_t *

https://github.com/ruby/yarp/commit/465e7bb0a9
2023-08-29 10:48:20 -04:00 · 2023-08-29 10:48:20 -04:00 · 7be08f3f58
commit 7be08f3f58
parent eac3da173a
39 changed files with 504 additions and 655 deletions
--- a/yarp/defines.h
+++ b/yarp/defines.h
@ -6,6 +6,7 @@
 #include <ctype.h>
 #include <stdarg.h>
 #include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <string.h>

@ -39,6 +40,6 @@
 #   define snprintf _snprintf
 #endif

-int yp_strncasecmp(const char *string1, const char *string2, size_t length);
+int yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);

 #endif
--- a/yarp/diagnostic.c
+++ b/yarp/diagnostic.c
@ -2,7 +2,7 @@

 // Append an error to the given list of diagnostic.
 bool
-yp_diagnostic_list_append(yp_list_t *list, const char *start, const char *end, const char *message) {
+yp_diagnostic_list_append(yp_list_t *list, const uint8_t *start, const uint8_t *end, const char *message) {
    yp_diagnostic_t *diagnostic = (yp_diagnostic_t *) malloc(sizeof(yp_diagnostic_t));
    if (diagnostic == NULL) return false;

--- a/yarp/diagnostic.h
+++ b/yarp/diagnostic.h
@ -10,13 +10,13 @@
 // This struct represents a diagnostic found during parsing.
 typedef struct {
    yp_list_node_t node;
-    const char *start;
-    const char *end;
+    const uint8_t *start;
+    const uint8_t *end;
    const char *message;
 } yp_diagnostic_t;

 // Append a diagnostic to the given list of diagnostics.
-bool yp_diagnostic_list_append(yp_list_t *list, const char *start, const char *end, const char *message);
+bool yp_diagnostic_list_append(yp_list_t *list, const uint8_t *start, const uint8_t *end, const char *message);

 // Deallocate the internal state of the given diagnostic list.
 void yp_diagnostic_list_free(yp_list_t *list);
--- a/yarp/enc/yp_big5.c
+++ b/yarp/enc/yp_big5.c
@ -1,69 +1,42 @@
 #include "yarp/enc/yp_encoding.h"

-typedef uint16_t yp_big5_codepoint_t;
-
-static yp_big5_codepoint_t
-yp_big5_codepoint(const char *c, ptrdiff_t n, size_t *width) {
-    const unsigned char *uc = (const unsigned char *) c;
-
+static size_t
+yp_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
    // These are the single byte characters.
-    if (*uc < 0x80) {
-        *width = 1;
-        return *uc;
+    if (*b < 0x80) {
+        return 1;
    }

    // These are the double byte characters.
-    if ((n > 1) && (uc[0] >= 0xA1 && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xFE)) {
-        *width = 2;
-        return (yp_big5_codepoint_t) (uc[0] << 8 | uc[1]);
+    if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE)) {
+        return 2;
    }

-    *width = 0;
    return 0;
 }

 static size_t
-yp_encoding_big5_char_width(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_big5_codepoint(c, n, &width);
-
-    return width;
-}
-
-static size_t
-yp_encoding_big5_alpha_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_alpha_char(&value, n);
+yp_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_big5_char_width(b, n) == 1) {
+        return yp_encoding_ascii_alpha_char(b, n);
    } else {
        return 0;
    }
 }

 static size_t
-yp_encoding_big5_alnum_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_alnum_char(&value, n);
+yp_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_big5_char_width(b, n) == 1) {
+        return yp_encoding_ascii_alnum_char(b, n);
    } else {
        return 0;
    }
 }

 static bool
-yp_encoding_big5_isupper_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_isupper_char(&value, n);
+yp_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_big5_char_width(b, n) == 1) {
+        return yp_encoding_ascii_isupper_char(b, n);
    } else {
        return false;
    }
--- a/yarp/enc/yp_encoding.h
+++ b/yarp/enc/yp_encoding.h
@ -16,22 +16,22 @@ typedef struct {
    // Return the number of bytes that the next character takes if it is valid
    // in the encoding. Does not read more than n bytes. It is assumed that n is
    // at least 1.
-    size_t (*char_width)(const char *c, ptrdiff_t n);
+    size_t (*char_width)(const uint8_t *b, ptrdiff_t n);

    // Return the number of bytes that the next character takes if it is valid
    // in the encoding and is alphabetical. Does not read more than n bytes. It
    // is assumed that n is at least 1.
-    size_t (*alpha_char)(const char *c, ptrdiff_t n);
+    size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);

    // Return the number of bytes that the next character takes if it is valid
    // in the encoding and is alphanumeric. Does not read more than n bytes. It
    // is assumed that n is at least 1.
-    size_t (*alnum_char)(const char *c, ptrdiff_t n);
+    size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);

    // Return true if the next character is valid in the encoding and is an
    // uppercase character. Does not read more than n bytes. It is assumed that
    // n is at least 1.
-    bool (*isupper_char)(const char *c, ptrdiff_t n);
+    bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);

    // The name of the encoding. This should correspond to a value that can be
    // passed to Encoding.find in Ruby.
@ -49,18 +49,18 @@ typedef struct {

 // These functions are reused by some other encodings, so they are defined here
 // so they can be shared.
-size_t yp_encoding_ascii_alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
-size_t yp_encoding_ascii_alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
-bool yp_encoding_ascii_isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
+size_t yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
+size_t yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
+bool yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);

 // These functions are shared between the actual encoding and the fast path in
 // the parser so they need to be internally visible.
-size_t yp_encoding_utf_8_alpha_char(const char *c, ptrdiff_t n);
-size_t yp_encoding_utf_8_alnum_char(const char *c, ptrdiff_t n);
+size_t yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
+size_t yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);

 // This lookup table is referenced in both the UTF-8 encoding file and the
 // parser directly in order to speed up the default encoding processing.
-extern unsigned char yp_encoding_unicode_table[256];
+extern uint8_t yp_encoding_unicode_table[256];

 // These are the encodings that are supported by the parser. They are defined in
 // their own files in the src/enc directory.
--- a/yarp/enc/yp_euc_jp.c
+++ b/yarp/enc/yp_euc_jp.c
@ -1,75 +1,48 @@
 #include "yarp/enc/yp_encoding.h"

-typedef uint16_t yp_euc_jp_codepoint_t;
-
-static yp_euc_jp_codepoint_t
-yp_euc_jp_codepoint(const char *c, ptrdiff_t n, size_t *width) {
-    const unsigned char *uc = (const unsigned char *) c;
-
+static size_t
+yp_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
    // These are the single byte characters.
-    if (*uc < 0x80) {
-        *width = 1;
-        return *uc;
+    if (*b < 0x80) {
+        return 1;
    }

    // These are the double byte characters.
    if (
        (n > 1) &&
        (
-            ((uc[0] == 0x8E) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) ||
-            ((uc[0] >= 0xA1 && uc[0] <= 0xFE) && (uc[1] >= 0xA1 && uc[1] <= 0xFE))
+            ((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
+            ((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
        )
    ) {
-        *width = 2;
-        return (yp_euc_jp_codepoint_t) (uc[0] << 8 | uc[1]);
+        return 2;
    }

-    *width = 0;
    return 0;
 }

 static size_t
-yp_encoding_euc_jp_char_width(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_euc_jp_codepoint(c, n, &width);
-
-    return width;
-}
-
-static size_t
-yp_encoding_euc_jp_alpha_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_alpha_char(&value, n);
+yp_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_euc_jp_char_width(b, n) == 1) {
+        return yp_encoding_ascii_alpha_char(b, n);
    } else {
        return 0;
    }
 }

 static size_t
-yp_encoding_euc_jp_alnum_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_alnum_char(&value, n);
+yp_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_euc_jp_char_width(b, n) == 1) {
+        return yp_encoding_ascii_alnum_char(b, n);
    } else {
        return 0;
    }
 }

 static bool
-yp_encoding_euc_jp_isupper_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_isupper_char(&value, n);
+yp_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_euc_jp_char_width(b, n) == 1) {
+        return yp_encoding_ascii_isupper_char(b, n);
    } else {
        return 0;
    }
--- a/yarp/enc/yp_gbk.c
+++ b/yarp/enc/yp_gbk.c
@ -1,78 +1,51 @@
 #include "yarp/enc/yp_encoding.h"

-typedef uint16_t yp_gbk_codepoint_t;
-
-static yp_gbk_codepoint_t
-yp_gbk_codepoint(const char *c, ptrdiff_t n, size_t *width) {
-    const unsigned char *uc = (const unsigned char *) c;
-
+static size_t
+yp_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
    // These are the single byte characters.
-    if (*uc < 0x80) {
-        *width = 1;
-        return *uc;
+    if (*b < 0x80) {
+        return 1;
    }

    // These are the double byte characters.
    if (
        (n > 1) &&
        (
-            ((uc[0] >= 0xA1 && uc[0] <= 0xA9) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/1
-            ((uc[0] >= 0xB0 && uc[0] <= 0xF7) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/2
-            ((uc[0] >= 0x81 && uc[0] <= 0xA0) && (uc[1] >= 0x40 && uc[1] <= 0xFE) && (uc[1] != 0x7F)) || // GBK/3
-            ((uc[0] >= 0xAA && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) || // GBK/4
-            ((uc[0] >= 0xA8 && uc[0] <= 0xA9) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) // GBK/5
+            ((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
+            ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
+            ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
+            ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
+            ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
        )
    ) {
-        *width = 2;
-        return (yp_gbk_codepoint_t) (uc[0] << 8 | uc[1]);
+        return 2;
    }

-    *width = 0;
    return 0;
 }

 static size_t
-yp_encoding_gbk_char_width(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_gbk_codepoint(c, n, &width);
-
-    return width;
-}
-
-static size_t
-yp_encoding_gbk_alpha_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_alpha_char(&value, n);
+yp_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_gbk_char_width(b, n) == 1) {
+        return yp_encoding_ascii_alpha_char(b, n);
    } else {
        return 0;
    }
 }

 static size_t
-yp_encoding_gbk_alnum_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_alnum_char(&value, n);
+yp_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_gbk_char_width(b, n) == 1) {
+        return yp_encoding_ascii_alnum_char(b, n);
    } else {
        return 0;
    }
 }

 static bool
-yp_encoding_gbk_isupper_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_isupper_char(&value, n);
+yp_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_gbk_char_width(b, n) == 1) {
+        return yp_encoding_ascii_isupper_char(b, n);
    } else {
        return false;
    }
--- a/yarp/enc/yp_shift_jis.c
+++ b/yarp/enc/yp_shift_jis.c
@ -1,73 +1,46 @@
 #include "yarp/enc/yp_encoding.h"

-typedef uint16_t yp_shift_jis_codepoint_t;
-
-static yp_shift_jis_codepoint_t
-yp_shift_jis_codepoint(const char *c, ptrdiff_t n, size_t *width) {
-    const unsigned char *uc = (const unsigned char *) c;
-
+static size_t
+yp_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
    // These are the single byte characters.
-    if (*uc < 0x80 || (*uc >= 0xA1 && *uc <= 0xDF)) {
-        *width = 1;
-        return *uc;
+    if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
+        return 1;
    }

    // These are the double byte characters.
    if (
        (n > 1) &&
-        ((uc[0] >= 0x81 && uc[0] <= 0x9F) || (uc[0] >= 0xE0 && uc[0] <= 0xFC)) &&
-        (uc[1] >= 0x40 && uc[1] <= 0xFC)
+        ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
+        (b[1] >= 0x40 && b[1] <= 0xFC)
    ) {
-        *width = 2;
-        return (yp_shift_jis_codepoint_t) (uc[0] << 8 | uc[1]);
+        return 2;
    }

-    *width = 0;
    return 0;
 }

 static size_t
-yp_encoding_shift_jis_char_width(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_shift_jis_codepoint(c, n, &width);
-
-    return width;
-}
-
-static size_t
-yp_encoding_shift_jis_alpha_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_alpha_char(&value, n);
+yp_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_shift_jis_char_width(b, n) == 1) {
+        return yp_encoding_ascii_alpha_char(b, n);
    } else {
        return 0;
    }
 }

 static size_t
-yp_encoding_shift_jis_alnum_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_alnum_char(&value, n);
+yp_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_shift_jis_char_width(b, n) == 1) {
+        return yp_encoding_ascii_alnum_char(b, n);
    } else {
        return 0;
    }
 }

 static bool
-yp_encoding_shift_jis_isupper_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_isupper_char(&value, n);
+yp_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_shift_jis_char_width(b, n) == 1) {
+        return yp_encoding_ascii_isupper_char(b, n);
    } else {
        return 0;
    }
--- a/yarp/enc/yp_tables.c
+++ b/yarp/enc/yp_tables.c
@ -2,7 +2,7 @@

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ASCII character.
-static unsigned char yp_encoding_ascii_table[256] = {
+static uint8_t yp_encoding_ascii_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -24,7 +24,7 @@ static unsigned char yp_encoding_ascii_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-1 character.
-static unsigned char yp_encoding_iso_8859_1_table[256] = {
+static uint8_t yp_encoding_iso_8859_1_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -46,7 +46,7 @@ static unsigned char yp_encoding_iso_8859_1_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-2 character.
-static unsigned char yp_encoding_iso_8859_2_table[256] = {
+static uint8_t yp_encoding_iso_8859_2_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -68,7 +68,7 @@ static unsigned char yp_encoding_iso_8859_2_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-3 character.
-static unsigned char yp_encoding_iso_8859_3_table[256] = {
+static uint8_t yp_encoding_iso_8859_3_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -90,7 +90,7 @@ static unsigned char yp_encoding_iso_8859_3_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-4 character.
-static unsigned char yp_encoding_iso_8859_4_table[256] = {
+static uint8_t yp_encoding_iso_8859_4_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -112,7 +112,7 @@ static unsigned char yp_encoding_iso_8859_4_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-5 character.
-static unsigned char yp_encoding_iso_8859_5_table[256] = {
+static uint8_t yp_encoding_iso_8859_5_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -134,7 +134,7 @@ static unsigned char yp_encoding_iso_8859_5_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-6 character.
-static unsigned char yp_encoding_iso_8859_6_table[256] = {
+static uint8_t yp_encoding_iso_8859_6_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -156,7 +156,7 @@ static unsigned char yp_encoding_iso_8859_6_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-7 character.
-static unsigned char yp_encoding_iso_8859_7_table[256] = {
+static uint8_t yp_encoding_iso_8859_7_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -178,7 +178,7 @@ static unsigned char yp_encoding_iso_8859_7_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-8 character.
-static unsigned char yp_encoding_iso_8859_8_table[256] = {
+static uint8_t yp_encoding_iso_8859_8_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -200,7 +200,7 @@ static unsigned char yp_encoding_iso_8859_8_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-9 character.
-static unsigned char yp_encoding_iso_8859_9_table[256] = {
+static uint8_t yp_encoding_iso_8859_9_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -222,7 +222,7 @@ static unsigned char yp_encoding_iso_8859_9_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-10 character.
-static unsigned char yp_encoding_iso_8859_10_table[256] = {
+static uint8_t yp_encoding_iso_8859_10_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -244,7 +244,7 @@ static unsigned char yp_encoding_iso_8859_10_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-11 character.
-static unsigned char yp_encoding_iso_8859_11_table[256] = {
+static uint8_t yp_encoding_iso_8859_11_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -266,7 +266,7 @@ static unsigned char yp_encoding_iso_8859_11_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-13 character.
-static unsigned char yp_encoding_iso_8859_13_table[256] = {
+static uint8_t yp_encoding_iso_8859_13_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -288,7 +288,7 @@ static unsigned char yp_encoding_iso_8859_13_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-14 character.
-static unsigned char yp_encoding_iso_8859_14_table[256] = {
+static uint8_t yp_encoding_iso_8859_14_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -310,7 +310,7 @@ static unsigned char yp_encoding_iso_8859_14_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-15 character.
-static unsigned char yp_encoding_iso_8859_15_table[256] = {
+static uint8_t yp_encoding_iso_8859_15_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -332,7 +332,7 @@ static unsigned char yp_encoding_iso_8859_15_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding ISO-8859-16 character.
-static unsigned char yp_encoding_iso_8859_16_table[256] = {
+static uint8_t yp_encoding_iso_8859_16_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -354,7 +354,7 @@ static unsigned char yp_encoding_iso_8859_16_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding KOI8-R character.
-static unsigned char yp_encoding_koi8_r_table[256] = {
+static uint8_t yp_encoding_koi8_r_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -376,7 +376,7 @@ static unsigned char yp_encoding_koi8_r_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding windows-1251 character.
-static unsigned char yp_encoding_windows_1251_table[256] = {
+static uint8_t yp_encoding_windows_1251_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -398,7 +398,7 @@ static unsigned char yp_encoding_windows_1251_table[256] = {

 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding windows-1252 character.
-static unsigned char yp_encoding_windows_1252_table[256] = {
+static uint8_t yp_encoding_windows_1252_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -419,34 +419,32 @@ static unsigned char yp_encoding_windows_1252_table[256] = {
 };

 static size_t
-yp_encoding_ascii_char_width(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
-    const unsigned char v = (const unsigned char) *c;
-    return v < 0x80 ? 1 : 0;
+yp_encoding_ascii_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
+    return *b < 0x80 ? 1 : 0;
 }

 size_t
-yp_encoding_ascii_alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
-    return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_ALPHABETIC_BIT);
+yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
+    return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHABETIC_BIT);
 }

 size_t
-yp_encoding_ascii_alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
-    return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
+yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
+    return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
 }

 bool
-yp_encoding_ascii_isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
-    return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_UPPERCASE_BIT);
+yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
+    return (yp_encoding_ascii_table[*b] & YP_ENCODING_UPPERCASE_BIT);
 }

 static size_t
-yp_encoding_koi8_r_char_width(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
-    const unsigned char v = (const unsigned char) *c;
-    return ((v >= 0x20 && v <= 0x7E) || (v >= 0x80)) ? 1 : 0;
+yp_encoding_koi8_r_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
+    return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
 }

 static size_t
-yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
+yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
    return 1;
 }

@ -469,14 +467,14 @@ yp_encoding_t yp_encoding_ascii_8bit = {
 };

 #define YP_ENCODING_TABLE(s, i, w) \
-    static size_t yp_encoding_ ##i ## _alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {           \
-        return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_ALPHABETIC_BIT);           \
+    static size_t yp_encoding_ ##i ## _alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {           \
+        return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHABETIC_BIT);           \
    }                                                                                                         \
-    static size_t yp_encoding_ ##i ## _alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {           \
-        return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
+    static size_t yp_encoding_ ##i ## _alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {           \
+        return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
    }                                                                                                         \
-    static bool yp_encoding_ ##i ## _isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {           \
-        return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_UPPERCASE_BIT);            \
+    static bool yp_encoding_ ##i ## _isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {           \
+        return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_UPPERCASE_BIT);            \
    }                                                                                                         \
    yp_encoding_t yp_encoding_ ##i = {                                                                        \
        .name = s,                                                                                            \
--- a/yarp/enc/yp_unicode.c
+++ b/yarp/enc/yp_unicode.c
@ -10,7 +10,7 @@ typedef uint32_t yp_unicode_codepoint_t;
 // this table is different from other encodings where we used a lookup table
 // because the indices of those tables are the byte representations, not the
 // codepoints themselves.
-unsigned char yp_encoding_unicode_table[256] = {
+uint8_t yp_encoding_unicode_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -2220,7 +2220,7 @@ static const uint8_t yp_utf_8_dfa[] = {
 };

 static yp_unicode_codepoint_t
-yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
+yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
    assert(n >= 1);
    size_t maximum = (size_t) n;

@ -2228,7 +2228,7 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
    uint32_t state = 0;

    for (size_t index = 0; index < 4 && index < maximum; index++) {
-        uint32_t byte = c[index];
+        uint32_t byte = b[index];
        uint32_t type = yp_utf_8_dfa[byte];

        codepoint = (state != 0) ?
@ -2247,60 +2247,55 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
 }

 static size_t
-yp_encoding_utf_8_char_width(const char *c, ptrdiff_t n) {
+yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
    size_t width;
-    const unsigned char *v = (const unsigned char *) c;
-
-    yp_utf_8_codepoint(v, n, &width);
+    yp_utf_8_codepoint(b, n, &width);
    return width;
 }

 size_t
-yp_encoding_utf_8_alpha_char(const char *c, ptrdiff_t n) {
-    const unsigned char *v = (const unsigned char *) c;
-    if (*v < 0x80) {
-        return (yp_encoding_unicode_table[*v] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
+yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (*b < 0x80) {
+        return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
    }

    size_t width;
-    yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
+    yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);

    if (codepoint <= 0xFF) {
-        return (yp_encoding_unicode_table[(unsigned char) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
+        return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
    } else {
        return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
    }
 }

 size_t
-yp_encoding_utf_8_alnum_char(const char *c, ptrdiff_t n) {
-    const unsigned char *v = (const unsigned char *) c;
-    if (*v < 0x80) {
-        return (yp_encoding_unicode_table[*v] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
+yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (*b < 0x80) {
+        return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
    }

    size_t width;
-    yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
+    yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);

    if (codepoint <= 0xFF) {
-        return (yp_encoding_unicode_table[(unsigned char) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
+        return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
    } else {
        return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
    }
 }

 static bool
-yp_encoding_utf_8_isupper_char(const char *c, ptrdiff_t n) {
-    const unsigned char *v = (const unsigned char *) c;
-    if (*v < 0x80) {
-        return (yp_encoding_unicode_table[*v] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
+yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (*b < 0x80) {
+        return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
    }

    size_t width;
-    yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
+    yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);

    if (codepoint <= 0xFF) {
-        return (yp_encoding_unicode_table[(unsigned char) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
+        return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
    } else {
        return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
    }
--- a/yarp/enc/yp_windows_31j.c
+++ b/yarp/enc/yp_windows_31j.c
@ -1,73 +1,46 @@
 #include "yarp/enc/yp_encoding.h"

-typedef uint16_t yp_windows_31j_codepoint_t;
-
-static yp_windows_31j_codepoint_t
-yp_windows_31j_codepoint(const char *c, ptrdiff_t n, size_t *width) {
-    const unsigned char *uc = (const unsigned char *) c;
-
+static size_t
+yp_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
    // These are the single byte characters.
-    if (*uc < 0x80 || (*uc >= 0xA1 && *uc <= 0xDF)) {
-        *width = 1;
-        return *uc;
+    if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
+        return 1;
    }

    // These are the double byte characters.
    if (
        (n > 1) &&
-        ((uc[0] >= 0x81 && uc[0] <= 0x9F) || (uc[0] >= 0xE0 && uc[0] <= 0xFC)) &&
-        (uc[1] >= 0x40 && uc[1] <= 0xFC)
+        ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
+        (b[1] >= 0x40 && b[1] <= 0xFC)
    ) {
-        *width = 2;
-        return (yp_windows_31j_codepoint_t) (uc[0] << 8 | uc[1]);
+        return 2;
    }

-    *width = 0;
    return 0;
 }

 static size_t
-yp_encoding_windows_31j_char_width(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_windows_31j_codepoint(c, n, &width);
-
-    return width;
-}
-
-static size_t
-yp_encoding_windows_31j_alpha_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_alpha_char(&value, n);
+yp_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_windows_31j_char_width(b, n) == 1) {
+        return yp_encoding_ascii_alpha_char(b, n);
    } else {
        return 0;
    }
 }

 static size_t
-yp_encoding_windows_31j_alnum_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_alnum_char(&value, n);
+yp_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_windows_31j_char_width(b, n) == 1) {
+        return yp_encoding_ascii_alnum_char(b, n);
    } else {
        return 0;
    }
 }

 static bool
-yp_encoding_windows_31j_isupper_char(const char *c, ptrdiff_t n) {
-    size_t width;
-    yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
-
-    if (width == 1) {
-        const char value = (const char) codepoint;
-        return yp_encoding_ascii_isupper_char(&value, n);
+yp_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (yp_encoding_windows_31j_char_width(b, n) == 1) {
+        return yp_encoding_ascii_isupper_char(b, n);
    } else {
        return false;
    }
--- a/yarp/extension.c
+++ b/yarp/extension.c
@ -260,7 +260,7 @@ parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
    yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);

    VALUE offsets = rb_ary_new();
-    VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
+    VALUE source_argv[] = { rb_str_new((const char *) yp_string_source(input), yp_string_length(input)), offsets };
    VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);

    parse_lex_data_t parse_lex_data = {
@ -442,7 +442,7 @@ named_captures(VALUE self, VALUE source) {
    yp_string_list_t string_list;
    yp_string_list_init(&string_list);

-    if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
+    if (!yp_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
        yp_string_list_free(&string_list);
        return Qnil;
    }
@ -450,7 +450,7 @@ named_captures(VALUE self, VALUE source) {
    VALUE names = rb_ary_new();
    for (size_t index = 0; index < string_list.length; index++) {
        const yp_string_t *string = &string_list.strings[index];
-        rb_ary_push(names, rb_str_new(yp_string_source(string), yp_string_length(string)));
+        rb_ary_push(names, rb_str_new((const char *) yp_string_source(string), yp_string_length(string)));
    }

    yp_string_list_free(&string_list);
@ -463,8 +463,8 @@ static VALUE
 unescape(VALUE source, yp_unescape_type_t unescape_type) {
    yp_string_t result;

-    if (yp_unescape_string(RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
-        VALUE str = rb_str_new(yp_string_source(&result), yp_string_length(&result));
+    if (yp_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
+        VALUE str = rb_str_new((const char *) yp_string_source(&result), yp_string_length(&result));
        yp_string_free(&result);
        return str;
    } else {
@ -498,7 +498,7 @@ static VALUE
 memsize(VALUE self, VALUE string) {
    yp_parser_t parser;
    size_t length = RSTRING_LEN(string);
-    yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
+    yp_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);

    yp_node_t *node = yp_parse(&parser);
    yp_memsize_t memsize;
--- a/yarp/parser.h
+++ b/yarp/parser.h
@ -109,14 +109,14 @@ typedef struct yp_lex_mode {

            // When lexing a list, it takes into account balancing the
            // terminator if the terminator is one of (), [], {}, or <>.
-            char incrementor;
+            uint8_t incrementor;

            // This is the terminator of the list literal.
-            char terminator;
+            uint8_t terminator;

            // This is the character set that should be used to delimit the
            // tokens within the list.
-            char breakpoints[11];
+            uint8_t breakpoints[11];
        } list;

        struct {
@ -125,14 +125,14 @@ typedef struct yp_lex_mode {

            // When lexing a regular expression, it takes into account balancing
            // the terminator if the terminator is one of (), [], {}, or <>.
-            char incrementor;
+            uint8_t incrementor;

            // This is the terminator of the regular expression.
-            char terminator;
+            uint8_t terminator;

            // This is the character set that should be used to delimit the
            // tokens within the regular expression.
-            char breakpoints[6];
+            uint8_t breakpoints[6];
        } regexp;

        struct {
@ -149,21 +149,21 @@ typedef struct yp_lex_mode {

            // When lexing a string, it takes into account balancing the
            // terminator if the terminator is one of (), [], {}, or <>.
-            char incrementor;
+            uint8_t incrementor;

            // This is the terminator of the string. It is typically either a
            // single or double quote.
-            char terminator;
+            uint8_t terminator;

            // This is the character set that should be used to delimit the
            // tokens within the string.
-            char breakpoints[6];
+            uint8_t breakpoints[6];
        } string;

        struct {
            // These pointers point to the beginning and end of the heredoc
            // identifier.
-            const char *ident_start;
+            const uint8_t *ident_start;
            size_t ident_length;

            yp_heredoc_quote_t quote;
@ -171,7 +171,7 @@ typedef struct yp_lex_mode {

            // This is the pointer to the character where lexing should resume
            // once the heredoc has been completely processed.
-            const char *next_start;
+            const uint8_t *next_start;
        } heredoc;
    } as;

@ -239,8 +239,8 @@ typedef enum {
 // This is a node in the linked list of comments that we've found while parsing.
 typedef struct yp_comment {
    yp_list_node_t node;
-    const char *start;
-    const char *end;
+    const uint8_t *start;
+    const uint8_t *end;
    yp_comment_type_t type;
 } yp_comment_t;

@ -252,7 +252,7 @@ typedef void (*yp_encoding_changed_callback_t)(yp_parser_t *parser);
 // the ability here to call out to a user-defined function to get an encoding
 // struct. If the function returns something that isn't NULL, we set that to
 // our encoding and use it to parse identifiers.
-typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const char *name, size_t width);
+typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);

 // When you are lexing through a file, the lexer needs all of the information
 // that the parser additionally provides (for example, the local table). So if
@ -316,21 +316,21 @@ struct yp_parser {
        size_t index;                           // the current index into the lexer mode stack
    } lex_modes;

-    const char *start;   // the pointer to the start of the source
-    const char *end;     // the pointer to the end of the source
+    const uint8_t *start;   // the pointer to the start of the source
+    const uint8_t *end;     // the pointer to the end of the source
    yp_token_t previous; // the previous token we were considering
    yp_token_t current;  // the current token we're considering

    // This is a special field set on the parser when we need the parser to jump
    // to a specific location when lexing the next token, as opposed to just
    // using the end of the previous token. Normally this is NULL.
-    const char *next_start;
+    const uint8_t *next_start;

    // This field indicates the end of a heredoc whose identifier was found on
    // the current line. If another heredoc is found on the same line, then this
    // will be moved forward to the end of that heredoc. If no heredocs are
    // found on a line then this is NULL.
-    const char *heredoc_end;
+    const uint8_t *heredoc_end;

    yp_list_t comment_list;             // the list of comments that have been found while parsing
    yp_list_t warning_list;             // the list of warnings that have been found while parsing
@ -361,7 +361,7 @@ struct yp_parser {

    // This pointer indicates where a comment must start if it is to be
    // considered an encoding comment.
-    const char *encoding_comment_start;
+    const uint8_t *encoding_comment_start;

    // This is an optional callback that can be attached to the parser that will
    // be called whenever a new token is lexed by the parser.
--- a/yarp/regexp.c
+++ b/yarp/regexp.c
@ -2,9 +2,9 @@

 // This is the parser that is going to handle parsing regular expressions.
 typedef struct {
-    const char *start;
-    const char *cursor;
-    const char *end;
+    const uint8_t *start;
+    const uint8_t *cursor;
+    const uint8_t *end;
    yp_string_list_t *named_captures;
    bool encoding_changed;
    yp_encoding_t *encoding;
@ -12,7 +12,7 @@ typedef struct {

 // This initializes a new parser with the given source.
 static void
-yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
+yp_regexp_parser_init(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
    *parser = (yp_regexp_parser_t) {
        .start = start,
        .cursor = start,
@ -25,7 +25,7 @@ yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char

 // This appends a new string to the list of named captures.
 static void
-yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const char *start, const char *end) {
+yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
    yp_string_t string;
    yp_string_shared_init(&string, start, end);
    yp_string_list_append(parser->named_captures, &string);
@ -40,7 +40,7 @@ yp_regexp_char_is_eof(yp_regexp_parser_t *parser) {

 // Optionally accept a char and consume it if it exists.
 static inline bool
-yp_regexp_char_accept(yp_regexp_parser_t *parser, char value) {
+yp_regexp_char_accept(yp_regexp_parser_t *parser, uint8_t value) {
    if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
        parser->cursor++;
        return true;
@ -50,7 +50,7 @@ yp_regexp_char_accept(yp_regexp_parser_t *parser, char value) {

 // Expect a character to be present and consume it.
 static inline bool
-yp_regexp_char_expect(yp_regexp_parser_t *parser, char value) {
+yp_regexp_char_expect(yp_regexp_parser_t *parser, uint8_t value) {
    if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
        parser->cursor++;
        return true;
@ -60,12 +60,12 @@ yp_regexp_char_expect(yp_regexp_parser_t *parser, char value) {

 // This advances the current token to the next instance of the given character.
 static bool
-yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
+yp_regexp_char_find(yp_regexp_parser_t *parser, uint8_t value) {
    if (yp_regexp_char_is_eof(parser)) {
        return false;
    }

-    const char *end = (const char *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
+    const uint8_t *end = (const uint8_t *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
    if (end == NULL) {
        return false;
    }
@ -107,7 +107,7 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
 // consumed so we're in the start state.
 static bool
 yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
-    const char *savepoint = parser->cursor;
+    const uint8_t *savepoint = parser->cursor;

    enum {
        YP_REGEXP_RANGE_QUANTIFIER_STATE_START,
@ -252,7 +252,7 @@ yp_regexp_parse_character_set(yp_regexp_parser_t *parser) {
 // A left bracket can either mean a POSIX class or a character set.
 static bool
 yp_regexp_parse_lbracket(yp_regexp_parser_t *parser) {
-    const char *reset = parser->cursor;
+    const uint8_t *reset = parser->cursor;

    if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
        parser->cursor++;
@ -287,7 +287,7 @@ typedef enum {

 // This is the set of options that are configurable on the regular expression.
 typedef struct {
-    unsigned char values[YP_REGEXP_OPTION_STATE_SLOTS];
+    uint8_t values[YP_REGEXP_OPTION_STATE_SLOTS];
 } yp_regexp_options_t;

 // Initialize a new set of options to their default values.
@ -305,9 +305,9 @@ yp_regexp_options_init(yp_regexp_options_t *options) {
 // Attempt to add the given option to the set of options. Returns true if it was
 // added, false if it was already present.
 static bool
-yp_regexp_options_add(yp_regexp_options_t *options, unsigned char key) {
+yp_regexp_options_add(yp_regexp_options_t *options, uint8_t key) {
    if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
-        key = (unsigned char) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
+        key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);

        switch (options->values[key]) {
            case YP_REGEXP_OPTION_STATE_INVALID:
@ -328,9 +328,9 @@ yp_regexp_options_add(yp_regexp_options_t *options, unsigned char key) {
 // Attempt to remove the given option from the set of options. Returns true if
 // it was removed, false if it was already absent.
 static bool
-yp_regexp_options_remove(yp_regexp_options_t *options, unsigned char key) {
+yp_regexp_options_remove(yp_regexp_options_t *options, uint8_t key) {
    if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
-        key = (unsigned char) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
+        key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);

        switch (options->values[key]) {
            case YP_REGEXP_OPTION_STATE_INVALID:
@ -431,7 +431,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
                        parser->cursor++;
                        break;
                    default: { // named capture group
-                        const char *start = parser->cursor;
+                        const uint8_t *start = parser->cursor;
                        if (!yp_regexp_char_find(parser, '>')) {
                            return false;
                        }
@ -441,7 +441,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
                }
                break;
            case '\'': { // named capture group
-                const char *start = ++parser->cursor;
+                const uint8_t *start = ++parser->cursor;
                if (!yp_regexp_char_find(parser, '\'')) {
                    return false;
                }
@ -456,7 +456,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
                break;
            case 'i': case 'm': case 'x': case 'd': case 'a': case 'u': // options
                while (!yp_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
-                    if (!yp_regexp_options_add(&options, (unsigned char) *parser->cursor)) {
+                    if (!yp_regexp_options_add(&options, *parser->cursor)) {
                        return false;
                    }
                    parser->cursor++;
@ -474,7 +474,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
            case '-':
                parser->cursor++;
                while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
-                    if (!yp_regexp_options_remove(&options, (unsigned char) *parser->cursor)) {
+                    if (!yp_regexp_options_remove(&options, *parser->cursor)) {
                        return false;
                    }
                    parser->cursor++;
@ -573,7 +573,7 @@ yp_regexp_parse_pattern(yp_regexp_parser_t *parser) {
 // Parse a regular expression and extract the names of all of the named capture
 // groups.
 YP_EXPORTED_FUNCTION bool
-yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
+yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
    yp_regexp_parser_t parser;
    yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
    return yp_regexp_parse_pattern(&parser);
--- a/yarp/regexp.h
+++ b/yarp/regexp.h
@ -14,6 +14,6 @@

 // Parse a regular expression and extract the names of all of the named capture
 // groups.
-YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding);
+YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding);

 #endif
--- a/yarp/templates/ext/yarp/api_node.c.erb
+++ b/yarp/templates/ext/yarp/api_node.c.erb
@ -12,7 +12,7 @@ static VALUE rb_cYARP<%= node.name %>;
 <%- end -%>

 static VALUE
-yp_location_new(yp_parser_t *parser, const char *start, const char *end, VALUE source) {
+yp_location_new(yp_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source) {
    VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(end - start) };
    return rb_class_new_instance(3, argv, rb_cYARPLocation);
 }
@ -24,7 +24,7 @@ yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALU

    VALUE argv[] = {
        ID2SYM(type),
-        rb_enc_str_new(token->start, token->end - token->start, encoding),
+        rb_enc_str_new((const char *) token->start, token->end - token->start, encoding),
        location
    };

@ -33,13 +33,13 @@ yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALU

 static VALUE
 yp_string_new(yp_string_t *string, rb_encoding *encoding) {
-    return rb_enc_str_new(yp_string_source(string), yp_string_length(string), encoding);
+    return rb_enc_str_new((const char *) yp_string_source(string), yp_string_length(string), encoding);
 }

 // Create a YARP::Source object from the given parser.
 VALUE
 yp_source_new(yp_parser_t *parser) {
-    VALUE source = rb_str_new(parser->start, parser->end - parser->start);
+    VALUE source = rb_str_new((const char *) parser->start, parser->end - parser->start);
    VALUE offsets = rb_ary_new_capa(parser->newline_list.size);

    for (size_t index = 0; index < parser->newline_list.size; index++) {
@ -85,7 +85,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
        yp_constant_t constant = parser->constant_pool.constants[index];

        if (constant.id != 0) {
-            constants[constant.id - 1] = rb_intern3(constant.start, constant.length, encoding);
+            constants[constant.id - 1] = rb_intern3((const char *) constant.start, constant.length, encoding);
        }
    }

--- a/yarp/templates/include/yarp/ast.h.erb
+++ b/yarp/templates/include/yarp/ast.h.erb
@ -21,15 +21,15 @@ typedef enum yp_token_type {
 // type and location information.
 typedef struct {
    yp_token_type_t type;
-    const char *start;
-    const char *end;
+    const uint8_t *start;
+    const uint8_t *end;
 } yp_token_t;

 // This represents a range of bytes in the source string to which a node or
 // token corresponds.
 typedef struct {
-    const char *start;
-    const char *end;
+    const uint8_t *start;
+    const uint8_t *end;
 } yp_location_t;

 typedef struct {
--- a/yarp/templates/src/prettyprint.c.erb
+++ b/yarp/templates/src/prettyprint.c.erb
@ -36,7 +36,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            }
            <%- when StringParam -%>
            yp_buffer_append_str(buffer, "\"", 1);
-            yp_buffer_append_str(buffer, yp_string_source(&((yp_<%= node.human %>_t *)node)-><%= param.name %>), yp_string_length(&((yp_<%= node.human %>_t *)node)-><%= param.name %>));
+            yp_buffer_append_bytes(buffer, yp_string_source(&((yp_<%= node.human %>_t *)node)-><%= param.name %>), yp_string_length(&((yp_<%= node.human %>_t *)node)-><%= param.name %>));
            yp_buffer_append_str(buffer, "\"", 1);
            <%- when NodeListParam -%>
            yp_buffer_append_str(buffer, "[", 1);
--- a/yarp/templates/src/serialize.c.erb
+++ b/yarp/templates/src/serialize.c.erb
@ -38,7 +38,7 @@ yp_serialize_string(yp_parser_t *parser, yp_string_t *string, yp_buffer_t *buffe
            uint32_t length = yp_sizet_to_u32(yp_string_length(string));
            yp_buffer_append_u8(buffer, 2);
            yp_buffer_append_u32(buffer, length);
-            yp_buffer_append_str(buffer, yp_string_source(string), length);
+            yp_buffer_append_bytes(buffer, yp_string_source(string), length);
            break;
        }
        case YP_STRING_MAPPED:
@ -234,7 +234,7 @@ serialize_token(void *data, yp_parser_t *parser, yp_token_t *token) {
 }

 YP_EXPORTED_FUNCTION void
-yp_lex_serialize(const char *source, size_t size, const char *filepath, yp_buffer_t *buffer) {
+yp_lex_serialize(const uint8_t *source, size_t size, const char *filepath, yp_buffer_t *buffer) {
    yp_parser_t parser;
    yp_parser_init(&parser, source, size, filepath);

@ -261,7 +261,7 @@ yp_lex_serialize(const char *source, size_t size, const char *filepath, yp_buffe
 // Parse and serialize both the AST and the tokens represented by the given
 // source to the given buffer.
 YP_EXPORTED_FUNCTION void
-yp_parse_lex_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
+yp_parse_lex_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
    yp_parser_t parser;
    yp_parser_init(&parser, source, size, NULL);
    if (metadata) yp_parser_metadata(&parser, metadata);
--- a/yarp/unescape.c
+++ b/yarp/unescape.c
@ -5,9 +5,9 @@
 /******************************************************************************/

 static inline bool
-yp_char_is_hexadecimal_digits(const char *c, size_t length) {
+yp_char_is_hexadecimal_digits(const uint8_t *string, size_t length) {
    for (size_t index = 0; index < length; index++) {
-        if (!yp_char_is_hexadecimal_digit(c[index])) {
+        if (!yp_char_is_hexadecimal_digit(string[index])) {
            return false;
        }
    }
@ -18,10 +18,8 @@ yp_char_is_hexadecimal_digits(const char *c, size_t length) {
 // expensive to go through the indirection of the function pointer. Instead we
 // provide a fast path that will check if we can just return 1.
 static inline size_t
-yp_char_width(yp_parser_t *parser, const char *start, const char *end) {
-    const unsigned char *uc = (const unsigned char *) start;
-
-    if (parser->encoding_changed || (*uc >= 0x80)) {
+yp_char_width(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    if (parser->encoding_changed || (*start >= 0x80)) {
        return parser->encoding.char_width(start, end - start);
    } else {
        return 1;
@ -33,7 +31,7 @@ yp_char_width(yp_parser_t *parser, const char *start, const char *end) {
 /******************************************************************************/

 // This is a lookup table for unescapes that only take up a single character.
-static const unsigned char unescape_chars[] = {
+static const uint8_t unescape_chars[] = {
    ['\''] = '\'',
    ['\\'] = '\\',
    ['a'] = '\a',
@ -60,9 +58,8 @@ static const bool ascii_printable_chars[] = {
 };

 static inline bool
-char_is_ascii_printable(const char c) {
-    unsigned char v = (unsigned char) c;
-    return (v < 0x80) && ascii_printable_chars[v];
+char_is_ascii_printable(const uint8_t b) {
+    return (b < 0x80) && ascii_printable_chars[b];
 }

 /******************************************************************************/
@ -72,37 +69,37 @@ char_is_ascii_printable(const char c) {
 // Scan the 1-3 digits of octal into the value. Returns the number of digits
 // scanned.
 static inline size_t
-unescape_octal(const char *backslash, unsigned char *value) {
-    *value = (unsigned char) (backslash[1] - '0');
+unescape_octal(const uint8_t *backslash, uint8_t *value) {
+    *value = (uint8_t) (backslash[1] - '0');
    if (!yp_char_is_octal_digit(backslash[2])) {
        return 2;
    }

-    *value = (unsigned char) ((*value << 3) | (backslash[2] - '0'));
+    *value = (uint8_t) ((*value << 3) | (backslash[2] - '0'));
    if (!yp_char_is_octal_digit(backslash[3])) {
        return 3;
    }

-    *value = (unsigned char) ((*value << 3) | (backslash[3] - '0'));
+    *value = (uint8_t) ((*value << 3) | (backslash[3] - '0'));
    return 4;
 }

 // Convert a hexadecimal digit into its equivalent value.
-static inline unsigned char
-unescape_hexadecimal_digit(const char value) {
-    return (unsigned char) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
+static inline uint8_t
+unescape_hexadecimal_digit(const uint8_t value) {
+    return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
 }

 // Scan the 1-2 digits of hexadecimal into the value. Returns the number of
 // digits scanned.
 static inline size_t
-unescape_hexadecimal(const char *backslash, unsigned char *value) {
+unescape_hexadecimal(const uint8_t *backslash, uint8_t *value) {
    *value = unescape_hexadecimal_digit(backslash[2]);
    if (!yp_char_is_hexadecimal_digit(backslash[3])) {
        return 3;
    }

-    *value = (unsigned char) ((*value << 4) | unescape_hexadecimal_digit(backslash[3]));
+    *value = (uint8_t) ((*value << 4) | unescape_hexadecimal_digit(backslash[3]));
    return 4;
 }

@ -110,7 +107,7 @@ unescape_hexadecimal(const char *backslash, unsigned char *value) {
 // digits scanned. This function assumes that the characters have already been
 // validated.
 static inline void
-unescape_unicode(const char *string, size_t length, uint32_t *value) {
+unescape_unicode(const uint8_t *string, size_t length, uint32_t *value) {
    *value = 0;
    for (size_t index = 0; index < length; index++) {
        if (index != 0) *value <<= 4;
@ -122,27 +119,25 @@ unescape_unicode(const char *string, size_t length, uint32_t *value) {
 // 32-bit value to write. Writes the UTF-8 representation of the value to the
 // string and returns the number of bytes written.
 static inline size_t
-unescape_unicode_write(char *dest, uint32_t value, const char *start, const char *end, yp_list_t *error_list) {
-    unsigned char *bytes = (unsigned char *) dest;
-
+unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, const uint8_t *end, yp_list_t *error_list) {
    if (value <= 0x7F) {
        // 0xxxxxxx
-        bytes[0] = (unsigned char) value;
+        dest[0] = (uint8_t) value;
        return 1;
    }

    if (value <= 0x7FF) {
        // 110xxxxx 10xxxxxx
-        bytes[0] = (unsigned char) (0xC0 | (value >> 6));
-        bytes[1] = (unsigned char) (0x80 | (value & 0x3F));
+        dest[0] = (uint8_t) (0xC0 | (value >> 6));
+        dest[1] = (uint8_t) (0x80 | (value & 0x3F));
        return 2;
    }

    if (value <= 0xFFFF) {
        // 1110xxxx 10xxxxxx 10xxxxxx
-        bytes[0] = (unsigned char) (0xE0 | (value >> 12));
-        bytes[1] = (unsigned char) (0x80 | ((value >> 6) & 0x3F));
-        bytes[2] = (unsigned char) (0x80 | (value & 0x3F));
+        dest[0] = (uint8_t) (0xE0 | (value >> 12));
+        dest[1] = (uint8_t) (0x80 | ((value >> 6) & 0x3F));
+        dest[2] = (uint8_t) (0x80 | (value & 0x3F));
        return 3;
    }

@ -150,10 +145,10 @@ unescape_unicode_write(char *dest, uint32_t value, const char *start, const char
    // the input is invalid.
    if (value <= 0x10FFFF) {
        // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-        bytes[0] = (unsigned char) (0xF0 | (value >> 18));
-        bytes[1] = (unsigned char) (0x80 | ((value >> 12) & 0x3F));
-        bytes[2] = (unsigned char) (0x80 | ((value >> 6) & 0x3F));
-        bytes[3] = (unsigned char) (0x80 | (value & 0x3F));
+        dest[0] = (uint8_t) (0xF0 | (value >> 18));
+        dest[1] = (uint8_t) (0x80 | ((value >> 12) & 0x3F));
+        dest[2] = (uint8_t) (0x80 | ((value >> 6) & 0x3F));
+        dest[3] = (uint8_t) (0x80 | (value & 0x3F));
        return 4;
    }

@ -161,9 +156,9 @@ unescape_unicode_write(char *dest, uint32_t value, const char *start, const char
    // want to just crash, so instead we'll add an error to the error list and put
    // in a replacement character instead.
    yp_diagnostic_list_append(error_list, start, end, "Invalid Unicode escape sequence.");
-    bytes[0] = 0xEF;
-    bytes[1] = 0xBF;
-    bytes[2] = 0xBD;
+    dest[0] = 0xEF;
+    dest[1] = 0xBF;
+    dest[2] = 0xBD;
    return 3;
 }

@ -175,24 +170,22 @@ typedef enum {
 } yp_unescape_flag_t;

 // Unescape a single character value based on the given flags.
-static inline unsigned char
-unescape_char(const unsigned char value, const unsigned char flags) {
-    unsigned char unescaped = value;
-
+static inline uint8_t
+unescape_char(uint8_t value, const uint8_t flags) {
    if (flags & YP_UNESCAPE_FLAG_CONTROL) {
-        unescaped &= 0x1f;
+        value &= 0x1f;
    }

    if (flags & YP_UNESCAPE_FLAG_META) {
-        unescaped |= 0x80;
+        value |= 0x80;
    }

-    return unescaped;
+    return value;
 }

 // Read a specific escape sequence into the given destination.
-static const char *
-unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backslash, const char *end, const unsigned char flags, bool write_to_str) {
+static const uint8_t *
+unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t *backslash, const uint8_t *end, const uint8_t flags, bool write_to_str) {
    switch (backslash[1]) {
        case 'a':
        case 'b':
@ -204,27 +197,27 @@ unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backs
        case 't':
        case 'v':
            if (write_to_str) {
-                dest[(*dest_length)++] = (char) unescape_char(unescape_chars[(unsigned char) backslash[1]], flags);
+                dest[(*dest_length)++] = unescape_char(unescape_chars[backslash[1]], flags);
            }
            return backslash + 2;
        // \nnn         octal bit pattern, where nnn is 1-3 octal digits ([0-7])
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9': {
-            unsigned char value;
-            const char *cursor = backslash + unescape_octal(backslash, &value);
+            uint8_t value;
+            const uint8_t *cursor = backslash + unescape_octal(backslash, &value);

            if (write_to_str) {
-                dest[(*dest_length)++] = (char) unescape_char(value, flags);
+                dest[(*dest_length)++] = unescape_char(value, flags);
            }
            return cursor;
        }
        // \xnn         hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F])
        case 'x': {
-            unsigned char value;
-            const char *cursor = backslash + unescape_hexadecimal(backslash, &value);
+            uint8_t value;
+            const uint8_t *cursor = backslash + unescape_hexadecimal(backslash, &value);

            if (write_to_str) {
-                dest[(*dest_length)++] = (char) unescape_char(value, flags);
+                dest[(*dest_length)++] = unescape_char(value, flags);
            }
            return cursor;
        }
@ -237,14 +230,14 @@ unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backs
            }

            if ((backslash + 3) < end && backslash[2] == '{') {
-                const char *unicode_cursor = backslash + 3;
-                const char *extra_codepoints_start = NULL;
+                const uint8_t *unicode_cursor = backslash + 3;
+                const uint8_t *extra_codepoints_start = NULL;
                int codepoints_count = 0;

                unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);

                while ((*unicode_cursor != '}') && (unicode_cursor < end)) {
-                    const char *unicode_start = unicode_cursor;
+                    const uint8_t *unicode_start = unicode_cursor;
                    size_t hexadecimal_length = yp_strspn_hexadecimal_digit(unicode_cursor, end - unicode_cursor);

                    // \u{nnnn} character literal allows only 1-6 hexadecimal digits
@ -311,7 +304,7 @@ unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backs
                    return unescape(parser, dest, dest_length, backslash + 2, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
                case '?':
                    if (write_to_str) {
-                        dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
+                        dest[(*dest_length)++] = unescape_char(0x7f, flags);
                    }
                    return backslash + 3;
                default: {
@ -321,7 +314,7 @@ unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backs
                    }

                    if (write_to_str) {
-                        dest[(*dest_length)++] = (char) unescape_char((const unsigned char) backslash[2], flags | YP_UNESCAPE_FLAG_CONTROL);
+                        dest[(*dest_length)++] = unescape_char(backslash[2], flags | YP_UNESCAPE_FLAG_CONTROL);
                    }
                    return backslash + 3;
                }
@ -349,7 +342,7 @@ unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backs
                    return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
                case '?':
                    if (write_to_str) {
-                        dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
+                        dest[(*dest_length)++] = unescape_char(0x7f, flags);
                    }
                    return backslash + 4;
                default:
@ -359,7 +352,7 @@ unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backs
                    }

                    if (write_to_str) {
-                        dest[(*dest_length)++] = (char) unescape_char((const unsigned char) backslash[3], flags | YP_UNESCAPE_FLAG_CONTROL);
+                        dest[(*dest_length)++] = unescape_char(backslash[3], flags | YP_UNESCAPE_FLAG_CONTROL);
                    }
                    return backslash + 4;
            }
@ -388,7 +381,7 @@ unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backs

            if (char_is_ascii_printable(backslash[3])) {
                if (write_to_str) {
-                    dest[(*dest_length)++] = (char) unescape_char((const unsigned char) backslash[3], flags | YP_UNESCAPE_FLAG_META);
+                    dest[(*dest_length)++] = unescape_char(backslash[3], flags | YP_UNESCAPE_FLAG_META);
                }
                return backslash + 4;
            }
@ -454,7 +447,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
        return;
    }

-    const char *backslash = yp_memchr(string->source, '\\', string->length, parser->encoding_changed, &parser->encoding);
+    const uint8_t *backslash = yp_memchr(string->source, '\\', string->length, parser->encoding_changed, &parser->encoding);

    if (backslash == NULL) {
        // Here there are no escapes, so we can reference the source directly.
@ -463,21 +456,21 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc

    // Here we have found an escape character, so we need to handle all escapes
    // within the string.
-    char *allocated = malloc(string->length);
+    uint8_t *allocated = malloc(string->length);
    if (allocated == NULL) {
        yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
        return;
    }

    // This is the memory address where we're putting the unescaped string.
-    char *dest = allocated;
+    uint8_t *dest = allocated;
    size_t dest_length = 0;

    // This is the current position in the source string that we're looking at.
    // It's going to move along behind the backslash so that we can copy each
    // segment of the string that doesn't contain an escape.
-    const char *cursor = string->source;
-    const char *end = string->source + string->length;
+    const uint8_t *cursor = string->source;
+    const uint8_t *end = string->source + string->length;

    // For each escape found in the source string, we will handle it and update
    // the moving cursor->backslash window.
@ -496,7 +489,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
        switch (backslash[1]) {
            case '\\':
            case '\'':
-                dest[dest_length++] = (char) unescape_chars[(unsigned char) backslash[1]];
+                dest[dest_length++] = unescape_chars[backslash[1]];
                cursor = backslash + 2;
                break;
            default:
@ -542,7 +535,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
 // actually perform any string manipulations. Instead, it calculates how long
 // the unescaped character is, and returns that value
 size_t
-yp_unescape_calculate_difference(yp_parser_t *parser, const char *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
+yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
    assert(unescape_type != YP_UNESCAPE_NONE);

    switch (backslash[1]) {
@ -558,11 +551,11 @@ yp_unescape_calculate_difference(yp_parser_t *parser, const char *backslash, yp_
            // handle all of the different unescapes.
            assert(unescape_type == YP_UNESCAPE_ALL);

-            unsigned char flags = YP_UNESCAPE_FLAG_NONE;
+            uint8_t flags = YP_UNESCAPE_FLAG_NONE;
            if (expect_single_codepoint)
                flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;

-            const char *cursor = unescape(parser, NULL, 0, backslash, parser->end, flags, false);
+            const uint8_t *cursor = unescape(parser, NULL, 0, backslash, parser->end, flags, false);
            assert(cursor > backslash);

            return (size_t) (cursor - backslash);
@ -574,7 +567,7 @@ yp_unescape_calculate_difference(yp_parser_t *parser, const char *backslash, yp_
 // string, a type of unescaping, and a pointer to a result string. It returns a
 // boolean indicating whether or not the unescaping was successful.
 YP_EXPORTED_FUNCTION bool
-yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
+yp_unescape_string(const uint8_t *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
    yp_parser_t parser;
    yp_parser_init(&parser, start, length, NULL);

--- a/yarp/unescape.h
+++ b/yarp/unescape.h
@ -35,10 +35,10 @@ YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, yp_

 // Accepts a source string and a type of unescaping and returns the unescaped version.
 // The caller must yp_string_free(result); after calling this function.
-YP_EXPORTED_FUNCTION bool yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result);
+YP_EXPORTED_FUNCTION bool yp_unescape_string(const uint8_t *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result);

 // Returns the number of bytes that encompass the first escape sequence in the
 // given string.
-size_t yp_unescape_calculate_difference(yp_parser_t *parser, const char *value, yp_unescape_type_t unescape_type, bool expect_single_codepoint);
+size_t yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *value, yp_unescape_type_t unescape_type, bool expect_single_codepoint);

 #endif
--- a/yarp/util/yp_buffer.c
+++ b/yarp/util/yp_buffer.c
@ -63,8 +63,13 @@ yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length) {
 // Append a string to the buffer.
 void
 yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length) {
-    const void *source = value;
-    yp_buffer_append(buffer, source, length);
+    yp_buffer_append(buffer, value, length);
+}
+
+// Append a list of bytes to the buffer.
+void
+yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length) {
+    yp_buffer_append(buffer, (const char *) value, length);
 }

 // Append a single byte to the buffer.
--- a/yarp/util/yp_buffer.h
+++ b/yarp/util/yp_buffer.h
@ -36,6 +36,9 @@ void yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length);
 // Append a string to the buffer.
 void yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length);

+// Append a list of bytes to the buffer.
+void yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length);
+
 // Append a single byte to the buffer.
 void yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value);

--- a/yarp/util/yp_char.c
+++ b/yarp/util/yp_char.c
@ -13,8 +13,8 @@
 #define YP_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
 #define YP_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)

-static const unsigned char yp_char_table[256] = {
-//0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+static const uint8_t yp_byte_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
@ -33,7 +33,7 @@ static const unsigned char yp_char_table[256] = {
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
 };

-static const unsigned char yp_number_table[256] = {
+static const uint8_t yp_number_table[256] = {
    // 0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
@ -54,20 +54,20 @@ static const unsigned char yp_number_table[256] = {
 };

 static inline size_t
-yp_strspn_char_kind(const char *string, ptrdiff_t length, unsigned char kind) {
+yp_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

-    while (size < maximum && (yp_char_table[(unsigned char) string[size]] & kind)) size++;
+    while (size < maximum && (yp_byte_table[string[size]] & kind)) size++;
    return size;
 }

 // Returns the number of characters at the start of the string that are
 // whitespace. Disallows searching past the given maximum number of characters.
 size_t
-yp_strspn_whitespace(const char *string, ptrdiff_t length) {
+yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_char_kind(string, length, YP_CHAR_BIT_WHITESPACE);
 }

@ -75,13 +75,13 @@ yp_strspn_whitespace(const char *string, ptrdiff_t length) {
 // whitespace while also tracking the location of each newline. Disallows
 // searching past the given maximum number of characters.
 size_t
-yp_strspn_whitespace_newlines(const char *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool stop_at_newline) {
+yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool stop_at_newline) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

-    while (size < maximum && (yp_char_table[(unsigned char) string[size]] & YP_CHAR_BIT_WHITESPACE)) {
+    while (size < maximum && (yp_byte_table[string[size]] & YP_CHAR_BIT_WHITESPACE)) {
        if (string[size] == '\n') {
            if (stop_at_newline) {
                return size + 1;
@ -100,42 +100,42 @@ yp_strspn_whitespace_newlines(const char *string, ptrdiff_t length, yp_newline_l
 // Returns the number of characters at the start of the string that are inline
 // whitespace. Disallows searching past the given maximum number of characters.
 size_t
-yp_strspn_inline_whitespace(const char *string, ptrdiff_t length) {
+yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_char_kind(string, length, YP_CHAR_BIT_INLINE_WHITESPACE);
 }

 // Returns the number of characters at the start of the string that are regexp
 // options. Disallows searching past the given maximum number of characters.
 size_t
-yp_strspn_regexp_option(const char *string, ptrdiff_t length) {
+yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_char_kind(string, length, YP_CHAR_BIT_REGEXP_OPTION);
 }

 static inline bool
-yp_char_is_char_kind(const char c, unsigned char kind) {
-    return (yp_char_table[(unsigned char) c] & kind) != 0;
+yp_char_is_char_kind(const uint8_t b, uint8_t kind) {
+    return (yp_byte_table[b] & kind) != 0;
 }

 // Returns true if the given character is a whitespace character.
 bool
-yp_char_is_whitespace(const char c) {
-    return yp_char_is_char_kind(c, YP_CHAR_BIT_WHITESPACE);
+yp_char_is_whitespace(const uint8_t b) {
+    return yp_char_is_char_kind(b, YP_CHAR_BIT_WHITESPACE);
 }

 // Returns true if the given character is an inline whitespace character.
 bool
-yp_char_is_inline_whitespace(const char c) {
-    return yp_char_is_char_kind(c, YP_CHAR_BIT_INLINE_WHITESPACE);
+yp_char_is_inline_whitespace(const uint8_t b) {
+    return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE);
 }

 static inline size_t
-yp_strspn_number_kind(const char *string, ptrdiff_t length, unsigned char kind) {
+yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

-    while (size < maximum && (yp_number_table[(unsigned char) string[size]] & kind)) size++;
+    while (size < maximum && (yp_number_table[string[size]] & kind)) size++;
    return size;
 }

@ -143,7 +143,7 @@ yp_strspn_number_kind(const char *string, ptrdiff_t length, unsigned char kind)
 // digits or underscores. Disallows searching past the given maximum number of
 // characters.
 size_t
-yp_strspn_binary_number(const char *string, ptrdiff_t length) {
+yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_BINARY_NUMBER);
 }

@ -151,14 +151,14 @@ yp_strspn_binary_number(const char *string, ptrdiff_t length) {
 // digits or underscores.  Disallows searching past the given maximum number of
 // characters.
 size_t
-yp_strspn_octal_number(const char *string, ptrdiff_t length) {
+yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_OCTAL_NUMBER);
 }

 // Returns the number of characters at the start of the string that are decimal
 // digits. Disallows searching past the given maximum number of characters.
 size_t
-yp_strspn_decimal_digit(const char *string, ptrdiff_t length) {
+yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_DIGIT);
 }

@ -166,7 +166,7 @@ yp_strspn_decimal_digit(const char *string, ptrdiff_t length) {
 // digits or underscores. Disallows searching past the given maximum number of
 // characters.
 size_t
-yp_strspn_decimal_number(const char *string, ptrdiff_t length) {
+yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_NUMBER);
 }

@ -174,7 +174,7 @@ yp_strspn_decimal_number(const char *string, ptrdiff_t length) {
 // hexadecimal digits. Disallows searching past the given maximum number of
 // characters.
 size_t
-yp_strspn_hexadecimal_digit(const char *string, ptrdiff_t length) {
+yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
 }

@ -182,37 +182,37 @@ yp_strspn_hexadecimal_digit(const char *string, ptrdiff_t length) {
 // hexadecimal digits or underscores. Disallows searching past the given maximum
 // number of characters.
 size_t
-yp_strspn_hexadecimal_number(const char *string, ptrdiff_t length) {
+yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
 }

 static inline bool
-yp_char_is_number_kind(const char c, unsigned char kind) {
-    return (yp_number_table[(unsigned char) c] & kind) != 0;
+yp_char_is_number_kind(const uint8_t b, uint8_t kind) {
+    return (yp_number_table[b] & kind) != 0;
 }

 // Returns true if the given character is a binary digit.
 bool
-yp_char_is_binary_digit(const char c) {
-    return yp_char_is_number_kind(c, YP_NUMBER_BIT_BINARY_DIGIT);
+yp_char_is_binary_digit(const uint8_t b) {
+    return yp_char_is_number_kind(b, YP_NUMBER_BIT_BINARY_DIGIT);
 }

 // Returns true if the given character is an octal digit.
 bool
-yp_char_is_octal_digit(const char c) {
-    return yp_char_is_number_kind(c, YP_NUMBER_BIT_OCTAL_DIGIT);
+yp_char_is_octal_digit(const uint8_t b) {
+    return yp_char_is_number_kind(b, YP_NUMBER_BIT_OCTAL_DIGIT);
 }

 // Returns true if the given character is a decimal digit.
 bool
-yp_char_is_decimal_digit(const char c) {
-    return yp_char_is_number_kind(c, YP_NUMBER_BIT_DECIMAL_DIGIT);
+yp_char_is_decimal_digit(const uint8_t b) {
+    return yp_char_is_number_kind(b, YP_NUMBER_BIT_DECIMAL_DIGIT);
 }

 // Returns true if the given character is a hexadecimal digit.
 bool
-yp_char_is_hexadecimal_digit(const char c) {
-    return yp_char_is_number_kind(c, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
+yp_char_is_hexadecimal_digit(const uint8_t b) {
+    return yp_char_is_number_kind(b, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
 }

 #undef YP_CHAR_BIT_WHITESPACE
--- a/yarp/util/yp_char.h
+++ b/yarp/util/yp_char.h
@ -9,67 +9,67 @@

 // Returns the number of characters at the start of the string that are
 // whitespace. Disallows searching past the given maximum number of characters.
-size_t yp_strspn_whitespace(const char *string, ptrdiff_t length);
+size_t yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length);

 // Returns the number of characters at the start of the string that are
 // whitespace while also tracking the location of each newline. Disallows
 // searching past the given maximum number of characters.
 size_t
-yp_strspn_whitespace_newlines(const char *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool);
+yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool stop_at_newline);

 // Returns the number of characters at the start of the string that are inline
 // whitespace. Disallows searching past the given maximum number of characters.
-size_t yp_strspn_inline_whitespace(const char *string, ptrdiff_t length);
+size_t yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);

 // Returns the number of characters at the start of the string that are decimal
 // digits. Disallows searching past the given maximum number of characters.
-size_t yp_strspn_decimal_digit(const char *string, ptrdiff_t length);
+size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);

 // Returns the number of characters at the start of the string that are
 // hexadecimal digits. Disallows searching past the given maximum number of
 // characters.
-size_t yp_strspn_hexadecimal_digit(const char *string, ptrdiff_t length);
+size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);

 // Returns the number of characters at the start of the string that are octal
 // digits or underscores.  Disallows searching past the given maximum number of
 // characters.
-size_t yp_strspn_octal_number(const char *string, ptrdiff_t length);
+size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length);

 // Returns the number of characters at the start of the string that are decimal
 // digits or underscores. Disallows searching past the given maximum number of
 // characters.
-size_t yp_strspn_decimal_number(const char *string, ptrdiff_t length);
+size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length);

 // Returns the number of characters at the start of the string that are
 // hexadecimal digits or underscores. Disallows searching past the given maximum
 // number of characters.
-size_t yp_strspn_hexadecimal_number(const char *string, ptrdiff_t length);
+size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length);

 // Returns the number of characters at the start of the string that are regexp
 // options. Disallows searching past the given maximum number of characters.
-size_t yp_strspn_regexp_option(const char *string, ptrdiff_t length);
+size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);

 // Returns the number of characters at the start of the string that are binary
 // digits or underscores. Disallows searching past the given maximum number of
 // characters.
-size_t yp_strspn_binary_number(const char *string, ptrdiff_t length);
+size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length);

 // Returns true if the given character is a whitespace character.
-bool yp_char_is_whitespace(const char c);
+bool yp_char_is_whitespace(const uint8_t b);

 // Returns true if the given character is an inline whitespace character.
-bool yp_char_is_inline_whitespace(const char c);
+bool yp_char_is_inline_whitespace(const uint8_t b);

 // Returns true if the given character is a binary digit.
-bool yp_char_is_binary_digit(const char c);
+bool yp_char_is_binary_digit(const uint8_t b);

 // Returns true if the given character is an octal digit.
-bool yp_char_is_octal_digit(const char c);
+bool yp_char_is_octal_digit(const uint8_t b);

 // Returns true if the given character is a decimal digit.
-bool yp_char_is_decimal_digit(const char c);
+bool yp_char_is_decimal_digit(const uint8_t b);

 // Returns true if the given character is a hexadecimal digit.
-bool yp_char_is_hexadecimal_digit(const char c);
+bool yp_char_is_hexadecimal_digit(const uint8_t b);

 #endif
--- a/yarp/util/yp_constant_pool.c
+++ b/yarp/util/yp_constant_pool.c
@ -48,12 +48,12 @@ yp_constant_id_list_free(yp_constant_id_list_t *list) {
 // A relatively simple hash function (djb2) that is used to hash strings. We are
 // optimizing here for simplicity and speed.
 static inline size_t
-yp_constant_pool_hash(const char *start, size_t length) {
+yp_constant_pool_hash(const uint8_t *start, size_t length) {
    // This is a prime number used as the initial value for the hash function.
    size_t value = 5381;

    for (size_t index = 0; index < length; index++) {
-        value = ((value << 5) + value) + ((unsigned char) start[index]);
+        value = ((value << 5) + value) + start[index];
    }

    return value;
@ -109,7 +109,7 @@ yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity) {
 // Insert a constant into a constant pool. Returns the id of the constant, or 0
 // if any potential calls to resize fail.
 yp_constant_id_t
-yp_constant_pool_insert(yp_constant_pool_t *pool, const char *start, size_t length) {
+yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
    if (pool->size >= (pool->capacity / 4 * 3)) {
        if (!yp_constant_pool_resize(pool)) return 0;
    }
--- a/yarp/util/yp_constant_pool.h
+++ b/yarp/util/yp_constant_pool.h
@ -40,7 +40,7 @@ void yp_constant_id_list_free(yp_constant_id_list_t *list);

 typedef struct {
    yp_constant_id_t id;
-    const char *start;
+    const uint8_t *start;
    size_t length;
    size_t hash;
 } yp_constant_t;
@ -59,7 +59,7 @@ bool yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity);

 // Insert a constant into a constant pool. Returns the id of the constant, or 0
 // if any potential calls to resize fail.
-yp_constant_id_t yp_constant_pool_insert(yp_constant_pool_t *pool, const char *start, size_t length);
+yp_constant_id_t yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length);

 // Free the memory associated with a constant pool.
 void yp_constant_pool_free(yp_constant_pool_t *pool);
--- a/yarp/util/yp_memchr.c
+++ b/yarp/util/yp_memchr.c
@ -8,7 +8,7 @@
 void *
 yp_memchr(const void *memory, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding) {
    if (encoding_changed && encoding->multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
-        const char *source = (const char *) memory;
+        const uint8_t *source = (const uint8_t *) memory;
        size_t index = 0;

        while (index < number) {
--- a/yarp/util/yp_newline_list.c
+++ b/yarp/util/yp_newline_list.c
@ -3,7 +3,7 @@
 // Initialize a new newline list with the given capacity. Returns true if the
 // allocation of the offsets succeeds, otherwise returns false.
 bool
-yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity) {
+yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity) {
    list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
    if (list->offsets == NULL) return false;

@ -23,7 +23,7 @@ yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity
 // Append a new offset to the newline list. Returns true if the reallocation of
 // the offsets succeeds (if one was necessary), otherwise returns false.
 bool
-yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
+yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor) {
    if (list->size == list->capacity) {
        list->capacity = (list->capacity * 3) / 2;
        list->offsets = (size_t *) realloc(list->offsets, list->capacity * sizeof(size_t));
@ -33,6 +33,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
    assert(*cursor == '\n');
    assert(cursor >= list->start);
    size_t newline_offset = (size_t) (cursor - list->start + 1);
+
    assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
    list->offsets[list->size++] = newline_offset;

@ -41,7 +42,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {

 // Conditionally append a new offset to the newline list, if the value passed in is a newline.
 bool
-yp_newline_list_check_append(yp_newline_list_t *list, const char *cursor) {
+yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor) {
    if (*cursor != '\n') {
        return true;
    }
@ -105,7 +106,7 @@ yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) {
 // list, the line and column of the closest offset less than the given offset
 // are returned.
 yp_line_column_t
-yp_newline_list_line_column(yp_newline_list_t *list, const char *cursor) {
+yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor) {
    assert(cursor >= list->start);
    size_t offset = (size_t) (cursor - list->start);
    yp_line_column_t result;
--- a/yarp/util/yp_newline_list.h
+++ b/yarp/util/yp_newline_list.h
@ -19,7 +19,7 @@
 // A list of offsets of newlines in a string. The offsets are assumed to be
 // sorted/inserted in ascending order.
 typedef struct {
-    const char *start;
+    const uint8_t *start;

    size_t *offsets;
    size_t size;
@ -41,19 +41,19 @@ typedef struct {

 // Initialize a new newline list with the given capacity. Returns true if the
 // allocation of the offsets succeeds, otherwise returns false.
-bool yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity);
+bool yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity);

 // Append a new offset to the newline list. Returns true if the reallocation of
 // the offsets succeeds (if one was necessary), otherwise returns false.
-bool yp_newline_list_append(yp_newline_list_t *list, const char *cursor);
+bool yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor);

 // Conditionally append a new offset to the newline list, if the value passed in is a newline.
-bool yp_newline_list_check_append(yp_newline_list_t *list, const char *cursor);
+bool yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor);

 // Returns the line and column of the given offset. If the offset is not in the
 // list, the line and column of the closest offset less than the given offset
 // are returned.
-yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const char *cursor);
+yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor);

 // Free the internal memory allocated for the newline list.
 void yp_newline_list_free(yp_newline_list_t *list);
--- a/yarp/util/yp_string.c
+++ b/yarp/util/yp_string.c
@ -12,18 +12,19 @@

 // Initialize a shared string that is based on initial input.
 void
-yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {
+yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end) {
    assert(start <= end);
+
    *string = (yp_string_t) {
        .type = YP_STRING_SHARED,
-        .source = (char*) start,
+        .source = start,
        .length = (size_t) (end - start)
    };
 }

 // Initialize an owned string that is responsible for freeing allocated memory.
 void
-yp_string_owned_init(yp_string_t *string, char *source, size_t length) {
+yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length) {
    *string = (yp_string_t) {
        .type = YP_STRING_OWNED,
        .source = source,
@ -36,13 +37,13 @@ void
 yp_string_constant_init(yp_string_t *string, const char *source, size_t length) {
    *string = (yp_string_t) {
        .type = YP_STRING_CONSTANT,
-        .source = (char*) source,
+        .source = (const uint8_t *) source,
        .length = length
    };
 }

 static void
-yp_string_mapped_init_internal(yp_string_t *string, char *source, size_t length) {
+yp_string_mapped_init_internal(yp_string_t *string, uint8_t *source, size_t length) {
    *string = (yp_string_t) {
        .type = YP_STRING_MAPPED,
        .source = source,
@ -67,13 +68,13 @@ yp_string_ensure_owned(yp_string_t *string) {
    if (string->type == YP_STRING_OWNED) return;

    size_t length = yp_string_length(string);
-    const char *source = yp_string_source(string);
+    const uint8_t *source = yp_string_source(string);

-    char *memory = malloc(length);
+    uint8_t *memory = malloc(length);
    if (!memory) return;

    yp_string_owned_init(string, memory, length);
-    memcpy(string->source, source, length);
+    memcpy((void *) string->source, source, length);
 }

 // Returns the length associated with the string.
@ -83,7 +84,7 @@ yp_string_length(const yp_string_t *string) {
 }

 // Returns the start pointer associated with the string.
-YP_EXPORTED_FUNCTION const char *
+YP_EXPORTED_FUNCTION const uint8_t *
 yp_string_source(const yp_string_t *string) {
    return string->source;
 }
@ -91,15 +92,16 @@ yp_string_source(const yp_string_t *string) {
 // Free the associated memory of the given string.
 YP_EXPORTED_FUNCTION void
 yp_string_free(yp_string_t *string) {
+    void *memory = (void *) string->source;
+
    if (string->type == YP_STRING_OWNED) {
-        free(string->source);
+        free(memory);
    } else if (string->type == YP_STRING_MAPPED && string->length) {
-        void *memory = (void *) string->source;
-        #if defined(_WIN32)
+#if defined(_WIN32)
        UnmapViewOfFile(memory);
-        #else
+#else
        munmap(memory, string->length);
-        #endif
+#endif
    }
 }

@ -126,8 +128,8 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
    // the source to a constant empty string and return.
    if (file_size == 0) {
        CloseHandle(file);
-        char empty_string[] = "";
-        yp_string_mapped_init_internal(string, empty_string, 0);
+        uint8_t empty[] = "";
+        yp_string_mapped_init_internal(string, empty, 0);
        return true;
    }

@ -140,7 +142,7 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
    }

    // Map the file into memory.
-    char *source = (char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
+    uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
    CloseHandle(mapping);
    CloseHandle(file);

@ -169,12 +171,12 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {

    // mmap the file descriptor to virtually get the contents
    size_t size = (size_t) sb.st_size;
-    char *source = NULL;
+    uint8_t *source = NULL;

    if (size == 0) {
        close(fd);
-        char empty_string[] = "";
-        yp_string_mapped_init_internal(string, empty_string, 0);
+        uint8_t empty[] = "";
+        yp_string_mapped_init_internal(string, empty, 0);
        return true;
    }

--- a/yarp/util/yp_string.h
+++ b/yarp/util/yp_string.h
@ -12,17 +12,17 @@
 // This struct represents a string value.
 typedef struct {
    enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type;
-    char *source;
+    const uint8_t *source;
    size_t length;
 } yp_string_t;

 #define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_CONSTANT, .source = NULL, .length = 0 })

 // Initialize a shared string that is based on initial input.
-void yp_string_shared_init(yp_string_t *string, const char *start, const char *end);
+void yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end);

 // Initialize an owned string that is responsible for freeing allocated memory.
-void yp_string_owned_init(yp_string_t *string, char *source, size_t length);
+void yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length);

 // Initialize a constant string that doesn't own its memory source.
 void yp_string_constant_init(yp_string_t *string, const char *source, size_t length);
@ -49,7 +49,7 @@ void yp_string_ensure_owned(yp_string_t *string);
 YP_EXPORTED_FUNCTION size_t yp_string_length(const yp_string_t *string);

 // Returns the start pointer associated with the string.
-YP_EXPORTED_FUNCTION const char * yp_string_source(const yp_string_t *string);
+YP_EXPORTED_FUNCTION const uint8_t * yp_string_source(const yp_string_t *string);

 // Free the associated memory of the given string.
 YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
--- a/yarp/util/yp_string_list.c
+++ b/yarp/util/yp_string_list.c
@ -1,11 +1,5 @@
 #include "yarp/util/yp_string_list.h"

-// Allocate a new yp_string_list_t.
-yp_string_list_t *
-yp_string_list_alloc(void) {
-    return (yp_string_list_t *) malloc(sizeof(yp_string_list_t));
-}
-
 // Initialize a yp_string_list_t with its default values.
 void
 yp_string_list_init(yp_string_list_t *string_list) {
--- a/yarp/util/yp_string_list.h
+++ b/yarp/util/yp_string_list.h
@ -13,9 +13,6 @@ typedef struct {
    size_t capacity;
 } yp_string_list_t;

-// Allocate a new yp_string_list_t.
-yp_string_list_t * yp_string_list_alloc(void);
-
 // Initialize a yp_string_list_t with its default values.
 YP_EXPORTED_FUNCTION void yp_string_list_init(yp_string_list_t *string_list);

--- a/yarp/util/yp_strncasecmp.c
+++ b/yarp/util/yp_strncasecmp.c
@ -1,18 +1,15 @@
 #include <ctype.h>
 #include <stddef.h>
+#include <stdint.h>

 int
-yp_strncasecmp(const char *string1, const char *string2, size_t length) {
+yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
    size_t offset = 0;
    int difference = 0;

    while (offset < length && string1[offset] != '\0') {
        if (string2[offset] == '\0') return string1[offset];
-
-        unsigned char left = (unsigned char) string1[offset];
-        unsigned char right = (unsigned char) string2[offset];
-
-        if ((difference = tolower(left) - tolower(right)) != 0) return difference;
+        if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
        offset++;
    }

--- a/yarp/util/yp_strpbrk.c
+++ b/yarp/util/yp_strpbrk.c
@ -1,12 +1,12 @@
 #include "yarp/util/yp_strpbrk.h"

 // This is the slow path that does care about the encoding.
-static inline const char *
-yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
+static inline const uint8_t *
+yp_strpbrk_multi_byte(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
    size_t index = 0;

    while (index < maximum) {
-        if (strchr(charset, source[index]) != NULL) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
            return source + index;
        }

@ -22,12 +22,12 @@ yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *chars
 }

 // This is the fast path that does not care about the encoding.
-static inline const char *
-yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
+static inline const uint8_t *
+yp_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
    size_t index = 0;

    while (index < maximum) {
-        if (strchr(charset, source[index]) != NULL) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
            return source + index;
        }

@ -54,8 +54,8 @@ yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum)
 // characters that are trailing bytes of multi-byte characters. For example, in
 // Shift-JIS, the backslash character can be a trailing byte. In that case we
 // need to take a slower path and iterate one multi-byte character at a time.
-const char *
-yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
+const uint8_t *
+yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
    if (length <= 0) {
        return NULL;
    } else if (parser->encoding_changed && parser->encoding.multibyte) {
--- a/yarp/util/yp_strpbrk.h
+++ b/yarp/util/yp_strpbrk.h
@ -24,6 +24,6 @@
 // characters that are trailing bytes of multi-byte characters. For example, in
 // Shift-JIS, the backslash character can be a trailing byte. In that case we
 // need to take a slower path and iterate one multi-byte character at a time.
-const char * yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length);
+const uint8_t * yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);

 #endif
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@ -167,8 +167,8 @@ debug_token(yp_token_t * token) {

 // Returns the incrementor character that should be used to increment the
 // nesting count if one is possible.
-static inline char
-lex_mode_incrementor(const char start) {
+static inline uint8_t
+lex_mode_incrementor(const uint8_t start) {
    switch (start) {
        case '(':
        case '[':
@ -182,8 +182,8 @@ lex_mode_incrementor(const char start) {

 // Returns the matching character that should be used to terminate a list
 // beginning with the given character.
-static inline char
-lex_mode_terminator(const char start) {
+static inline uint8_t
+lex_mode_terminator(const uint8_t start) {
    switch (start) {
        case '(':
            return ')';
@ -221,9 +221,9 @@ lex_mode_push(yp_parser_t *parser, yp_lex_mode_t lex_mode) {

 // Push on a new list lex mode.
 static inline bool
-lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
-    char incrementor = lex_mode_incrementor(delimiter);
-    char terminator = lex_mode_terminator(delimiter);
+lex_mode_push_list(yp_parser_t *parser, bool interpolation, uint8_t delimiter) {
+    uint8_t incrementor = lex_mode_incrementor(delimiter);
+    uint8_t terminator = lex_mode_terminator(delimiter);

    yp_lex_mode_t lex_mode = {
        .mode = YP_LEX_LIST,
@ -237,7 +237,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {

    // These are the places where we need to split up the content of the list.
    // We'll use strpbrk to find the first of these characters.
-    char *breakpoints = lex_mode.as.list.breakpoints;
+    uint8_t *breakpoints = lex_mode.as.list.breakpoints;
    memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));

    // Now we'll add the terminator to the list of breakpoints.
@ -260,7 +260,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {

 // Push on a new regexp lex mode.
 static inline bool
-lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
+lex_mode_push_regexp(yp_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
    yp_lex_mode_t lex_mode = {
        .mode = YP_LEX_REGEXP,
        .as.regexp = {
@ -273,7 +273,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
    // These are the places where we need to split up the content of the
    // regular expression. We'll use strpbrk to find the first of these
    // characters.
-    char *breakpoints = lex_mode.as.regexp.breakpoints;
+    uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
    memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));

    // First we'll add the terminator.
@ -289,7 +289,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {

 // Push on a new string lex mode.
 static inline bool
-lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, char incrementor, char terminator) {
+lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
    yp_lex_mode_t lex_mode = {
        .mode = YP_LEX_STRING,
        .as.string = {
@ -303,7 +303,7 @@ lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed

    // These are the places where we need to split up the content of the
    // string. We'll use strpbrk to find the first of these characters.
-    char *breakpoints = lex_mode.as.string.breakpoints;
+    uint8_t *breakpoints = lex_mode.as.string.breakpoints;
    memcpy(breakpoints, "\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));

    // Now add in the terminator.
@ -423,7 +423,7 @@ debug_lex_state_set(yp_parser_t *parser, yp_lex_state_t state, char const * call

 // Retrieve the constant pool id for the given location.
 static inline yp_constant_id_t
-yp_parser_constant_id_location(yp_parser_t *parser, const char *start, const char *end) {
+yp_parser_constant_id_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
    return yp_constant_pool_insert(&parser->constant_pool, start, (size_t) (end - start));
 }

@ -615,7 +615,7 @@ yp_regular_expression_flags_create(const yp_token_t *closing) {
    yp_node_flags_t flags = 0;

    if (closing->type == YP_TOKEN_REGEXP_END) {
-        for (const char *flag = closing->start + 1; flag < closing->end; flag++) {
+        for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
            switch (*flag) {
                case 'i': flags |= YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
                case 'm': flags |= YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
@ -657,7 +657,7 @@ yp_alloc_node(YP_ATTRIBUTE_UNUSED yp_parser_t *parser, size_t size) {

 // Allocate a new MissingNode node.
 static yp_missing_node_t *
-yp_missing_node_create(yp_parser_t *parser, const char *start, const char *end) {
+yp_missing_node_create(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
    yp_missing_node_t *node = YP_ALLOC_NODE(parser, yp_missing_node_t);
    *node = (yp_missing_node_t) {{ .type = YP_NODE_MISSING_NODE, .location = { .start = start, .end = end } }};
    return node;
@ -926,7 +926,7 @@ yp_array_pattern_node_requireds_append(yp_array_pattern_node_t *node, yp_node_t
 static yp_assoc_node_t *
 yp_assoc_node_create(yp_parser_t *parser, yp_node_t *key, const yp_token_t *operator, yp_node_t *value) {
    yp_assoc_node_t *node = YP_ALLOC_NODE(parser, yp_assoc_node_t);
-    const char *end;
+    const uint8_t *end;

    if (value != NULL) {
        end = value->location.end;
@ -1110,7 +1110,7 @@ static yp_block_parameters_node_t *
 yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *parameters, const yp_token_t *opening) {
    yp_block_parameters_node_t *node = YP_ALLOC_NODE(parser, yp_block_parameters_node_t);

-    const char *start;
+    const uint8_t *start;
    if (opening->type != YP_TOKEN_NOT_PROVIDED) {
        start = opening->start;
    } else if (parameters != NULL) {
@ -1119,7 +1119,7 @@ yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *param
        start = NULL;
    }

-    const char *end;
+    const uint8_t *end;
    if (parameters != NULL) {
        end = parameters->base.location.end;
    } else if (opening->type != YP_TOKEN_NOT_PROVIDED) {
@ -1878,7 +1878,7 @@ yp_def_node_create(
    const yp_token_t *end_keyword
 ) {
    yp_def_node_t *node = YP_ALLOC_NODE(parser, yp_def_node_t);
-    const char *end;
+    const uint8_t *end;

    if (end_keyword->type == YP_TOKEN_NOT_PROVIDED) {
        end = body->location.end;
@ -1933,7 +1933,7 @@ yp_defined_node_create(yp_parser_t *parser, const yp_token_t *lparen, yp_node_t
 static yp_else_node_t *
 yp_else_node_create(yp_parser_t *parser, const yp_token_t *else_keyword, yp_statements_node_t *statements, const yp_token_t *end_keyword) {
    yp_else_node_t *node = YP_ALLOC_NODE(parser, yp_else_node_t);
-    const char *end = NULL;
+    const uint8_t *end = NULL;
    if ((end_keyword->type == YP_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
        end = statements->base.location.end;
    } else {
@ -2413,7 +2413,7 @@ yp_if_node_create(yp_parser_t *parser,
    yp_flip_flop(predicate);
    yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t);

-    const char *end;
+    const uint8_t *end;
    if (end_keyword->type != YP_TOKEN_NOT_PROVIDED) {
        end = end_keyword->end;
    } else if (consequent != NULL) {
@ -2596,7 +2596,7 @@ static yp_in_node_t *
 yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t *statements, const yp_token_t *in_keyword, const yp_token_t *then_keyword) {
    yp_in_node_t *node = YP_ALLOC_NODE(parser, yp_in_node_t);

-    const char *end;
+    const uint8_t *end;
    if (statements != NULL) {
        end = statements->base.location.end;
    } else if (then_keyword->type != YP_TOKEN_NOT_PROVIDED) {
@ -3891,7 +3891,7 @@ yp_statements_node_body_length(yp_statements_node_t *node) {

 // Set the location of the given StatementsNode.
 static void
-yp_statements_node_location_set(yp_statements_node_t *node, const char *start, const char *end) {
+yp_statements_node_location_set(yp_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
    node->base.location = (yp_location_t) { .start = start, .end = end };
 }

@ -3957,7 +3957,7 @@ yp_super_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_argument
    assert(keyword->type == YP_TOKEN_KEYWORD_SUPER);
    yp_super_node_t *node = YP_ALLOC_NODE(parser, yp_super_node_t);

-    const char *end;
+    const uint8_t *end;
    if (arguments->block != NULL) {
        end = arguments->block->base.location.end;
    } else if (arguments->closing_loc.start != NULL) {
@ -4048,7 +4048,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
 // Check if the given node is a label in a hash.
 static bool
 yp_symbol_node_label_p(yp_node_t *node) {
-    const char *end = NULL;
+    const uint8_t *end = NULL;

    switch (YP_NODE_TYPE(node)) {
        case YP_NODE_SYMBOL_NODE:
@ -4156,7 +4156,7 @@ yp_unless_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t
    yp_flip_flop(predicate);
    yp_unless_node_t *node = YP_ALLOC_NODE(parser, yp_unless_node_t);

-    const char *end;
+    const uint8_t *end;
    if (statements != NULL) {
        end = statements->base.location.end;
    } else {
@ -4373,7 +4373,7 @@ static yp_yield_node_t *
 yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_location_t *lparen_loc, yp_arguments_node_t *arguments, const yp_location_t *rparen_loc) {
    yp_yield_node_t *node = YP_ALLOC_NODE(parser, yp_yield_node_t);

-    const char *end;
+    const uint8_t *end;
    if (rparen_loc->start != NULL) {
        end = rparen_loc->end;
    } else if (arguments != NULL) {
@ -4447,7 +4447,7 @@ yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {

 // Add a local variable from a location to the current scope.
 static yp_constant_id_t
-yp_parser_local_add_location(yp_parser_t *parser, const char *start, const char *end) {
+yp_parser_local_add_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
    yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);

    if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
@ -4496,15 +4496,13 @@ yp_parser_scope_pop(yp_parser_t *parser) {
 // reason we have the encoding_changed boolean to check if we need to go through
 // the function pointer or can just directly use the UTF-8 functions.
 static inline size_t
-char_is_identifier_start(yp_parser_t *parser, const char *c) {
-    const unsigned char uc = (unsigned char) *c;
-
+char_is_identifier_start(yp_parser_t *parser, const uint8_t *b) {
    if (parser->encoding_changed) {
-        return parser->encoding.alpha_char(c, parser->end - c) || (uc == '_') || (uc >= 0x80);
-    } else if (uc < 0x80) {
-        return (yp_encoding_unicode_table[uc] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (uc == '_');
+        return parser->encoding.alpha_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
+    } else if (*b < 0x80) {
+        return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
    } else {
-        return (size_t) (yp_encoding_utf_8_alpha_char(c, parser->end - c) || 1u);
+        return (size_t) (yp_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
    }
 }

@ -4512,15 +4510,13 @@ char_is_identifier_start(yp_parser_t *parser, const char *c) {
 // the identifiers in a source file once the first character has been found. So
 // it's important that it be as fast as possible.
 static inline size_t
-char_is_identifier(yp_parser_t *parser, const char *c) {
-    const unsigned char uc = (unsigned char) *c;
-
+char_is_identifier(yp_parser_t *parser, const uint8_t *b) {
    if (parser->encoding_changed) {
-        return parser->encoding.alnum_char(c, parser->end - c) || (uc == '_') || (uc >= 0x80);
-    } else if (uc < 0x80) {
-        return (yp_encoding_unicode_table[uc] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (uc == '_');
+        return parser->encoding.alnum_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
+    } else if (*b < 0x80) {
+        return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
    } else {
-        return (size_t) (yp_encoding_utf_8_alnum_char(c, parser->end - c) || 1u);
+        return (size_t) (yp_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
    }
 }

@ -4542,15 +4538,15 @@ const unsigned int yp_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = {
 #undef PUNCT

 static inline bool
-char_is_global_name_punctuation(const char c) {
-    const unsigned int i = (const unsigned int) c;
+char_is_global_name_punctuation(const uint8_t b) {
+    const unsigned int i = (const unsigned int) b;
    if (i <= 0x20 || 0x7e < i) return false;

-    return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (c % 32)) & 1;
+    return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
 }

 static inline bool
-token_is_numbered_parameter(const char *start, const char *end) {
+token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
    return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (yp_char_is_decimal_digit(start[1]));
 }

@ -4604,8 +4600,8 @@ yp_do_loop_stack_p(yp_parser_t *parser) {

 // Get the next character in the source starting from +cursor+. If that position
 // is beyond the end of the source then return '\0'.
-static inline char
-peek_at(yp_parser_t *parser, const char *cursor) {
+static inline uint8_t
+peek_at(yp_parser_t *parser, const uint8_t *cursor) {
    if (cursor < parser->end) {
        return *cursor;
    } else {
@ -4616,33 +4612,33 @@ peek_at(yp_parser_t *parser, const char *cursor) {
 // Get the next character in the source starting from parser->current.end and
 // adding the given offset. If that position is beyond the end of the source
 // then return '\0'.
-static inline char
+static inline uint8_t
 peek_offset(yp_parser_t *parser, ptrdiff_t offset) {
    return peek_at(parser, parser->current.end + offset);
 }

 // Get the next character in the source starting from parser->current.end. If
 // that position is beyond the end of the source then return '\0'.
-static inline char
+static inline uint8_t
 peek(yp_parser_t *parser) {
    return peek_at(parser, parser->current.end);
 }

 // Get the next string of length len in the source starting from parser->current.end.
 // If the string extends beyond the end of the source, return the empty string ""
-static inline const char*
+static inline const uint8_t *
 peek_string(yp_parser_t *parser, size_t len) {
    if (parser->current.end + len <= parser->end) {
        return parser->current.end;
    } else {
-        return "";
+        return (const uint8_t *) "";
    }
 }

 // If the character to be read matches the given value, then returns true and
 // advanced the current pointer.
 static inline bool
-match(yp_parser_t *parser, char value) {
+match(yp_parser_t *parser, uint8_t value) {
    if (peek(parser) == value) {
        parser->current.end++;
        return true;
@ -4653,7 +4649,7 @@ match(yp_parser_t *parser, char value) {
 // Return the length of the line ending string starting at +cursor+, or 0 if it
 // is not a line ending. This function is intended to be CRLF/LF agnostic.
 static inline size_t
-match_eol_at(yp_parser_t *parser, const char *cursor) {
+match_eol_at(yp_parser_t *parser, const uint8_t *cursor) {
    if (peek_at(parser, cursor) == '\n') {
        return 1;
    }
@ -4680,8 +4676,8 @@ match_eol(yp_parser_t *parser) {
 }

 // Skip to the next newline character or NUL byte.
-static inline const char *
-next_newline(const char *cursor, ptrdiff_t length) {
+static inline const uint8_t *
+next_newline(const uint8_t *cursor, ptrdiff_t length) {
    assert(length >= 0);

    // Note that it's okay for us to use memchr here to look for \n because none
@ -4692,15 +4688,15 @@ next_newline(const char *cursor, ptrdiff_t length) {

 // Find the start of the encoding comment. This is effectively an inlined
 // version of strnstr with some modifications.
-static inline const char *
-parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdiff_t remaining) {
+static inline const uint8_t *
+parser_lex_encoding_comment_start(yp_parser_t *parser, const uint8_t *cursor, ptrdiff_t remaining) {
    assert(remaining >= 0);
    size_t length = (size_t) remaining;

    size_t key_length = strlen("coding:");
    if (key_length > length) return NULL;

-    const char *cursor_limit = cursor + length - key_length + 1;
+    const uint8_t *cursor_limit = cursor + length - key_length + 1;
    while ((cursor = yp_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
        if (memcmp(cursor, "coding", key_length - 1) == 0) {
            size_t whitespace_after_coding = yp_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
@ -4721,13 +4717,13 @@ parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdi
 // actions are necessary for it here.
 static void
 parser_lex_encoding_comment(yp_parser_t *parser) {
-    const char *start = parser->current.start + 1;
-    const char *end = next_newline(start, parser->end - start);
+    const uint8_t *start = parser->current.start + 1;
+    const uint8_t *end = next_newline(start, parser->end - start);
    if (end == NULL) end = parser->end;

    // These are the patterns we're going to match to find the encoding comment.
    // This is definitely not complete or even really correct.
-    const char *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
+    const uint8_t *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);

    // If we didn't find anything that matched our patterns, then return. Note
    // that this does a _very_ poor job of actually finding the encoding, and
@ -4740,7 +4736,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {

    // Now determine the end of the encoding string. This is either the end of
    // the line, the first whitespace character, or a punctuation mark.
-    const char *encoding_end = yp_strpbrk(parser, encoding_start, " \t\f\r\v\n;,", end - encoding_start);
+    const uint8_t *encoding_end = yp_strpbrk(parser, encoding_start, (const uint8_t *) " \t\f\r\v\n;,", end - encoding_start);
    encoding_end = encoding_end == NULL ? end : encoding_end;

    // Finally, we can determine the width of the encoding string.
@ -4762,7 +4758,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
    // Extensions like utf-8 can contain extra encoding details like,
    // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
    // treat any encoding starting utf-8 as utf-8.
-    if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
+    if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, (const uint8_t *) "utf-8", 5) == 0)) {
        // We don't need to do anything here because the default encoding is
        // already UTF-8. We'll just return.
        return;
@ -4771,7 +4767,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
    // Next, we're going to loop through each of the encodings that we handle
    // explicitly. If we found one that we understand, we'll use that value.
 #define ENCODING(value, prebuilt) \
-    if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
+    if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, (const uint8_t *) value, width) == 0) { \
        parser->encoding = prebuilt; \
        parser->encoding_changed |= true; \
        if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
@ -5093,7 +5089,7 @@ lex_numeric(yp_parser_t *parser) {
    if (parser->current.end < parser->end) {
        type = lex_numeric_prefix(parser);

-        const char *end = parser->current.end;
+        const uint8_t *end = parser->current.end;
        yp_token_type_t suffix_type = type;

        if (type == YP_TOKEN_INTEGER) {
@ -5118,8 +5114,8 @@ lex_numeric(yp_parser_t *parser) {
            }
        }

-        const unsigned char uc = (const unsigned char) peek(parser);
-        if (uc != '\0' && (uc >= 0x80 || ((uc >= 'a' && uc <= 'z') || (uc >= 'A' && uc <= 'Z')) || uc == '_')) {
+        const uint8_t b = peek(parser);
+        if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
            parser->current.end = end;
        } else {
            type = suffix_type;
@ -5390,7 +5386,7 @@ current_token_starts_line(yp_parser_t *parser) {
 //     this token type.
 //
 static yp_token_type_t
-lex_interpolation(yp_parser_t *parser, const char *pound) {
+lex_interpolation(yp_parser_t *parser, const uint8_t *pound) {
    // If there is no content following this #, then we're at the end of
    // the string and we can safely return string content.
    if (pound + 1 >= parser->end) {
@ -5411,7 +5407,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {

            // If we're looking at a @ and there's another @, then we'll skip past the
            // second @.
-            const char *variable = pound + 2;
+            const uint8_t *variable = pound + 2;
            if (*variable == '@' && pound + 3 < parser->end) variable++;

            if (char_is_identifier_start(parser, variable)) {
@ -5447,7 +5443,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
            // This is the character that we're going to check to see if it is the
            // start of an identifier that would indicate that this is a global
            // variable.
-            const char *check = pound + 2;
+            const uint8_t *check = pound + 2;

            if (pound[2] == '-') {
                if (pound + 3 >= parser->end) {
@ -5638,7 +5634,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
 static yp_token_type_t
 lex_embdoc(yp_parser_t *parser) {
    // First, lex out the EMBDOC_BEGIN token.
-    const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+    const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);

    if (newline == NULL) {
        parser->current.end = parser->end;
@ -5663,7 +5659,7 @@ lex_embdoc(yp_parser_t *parser) {
        // token here.
        if (memcmp(parser->current.end, "=end", 4) == 0 &&
                (parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
-            const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+            const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);

            if (newline == NULL) {
                parser->current.end = parser->end;
@ -5683,7 +5679,7 @@ lex_embdoc(yp_parser_t *parser) {

        // Otherwise, we'll parse until the end of the line and return a line of
        // embedded documentation.
-        const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+        const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);

        if (newline == NULL) {
            parser->current.end = parser->end;
@ -5833,7 +5829,7 @@ parser_lex(yp_parser_t *parser) {
                    LEX(YP_TOKEN_EOF);

                case '#': { // comments
-                    const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
+                    const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);

                    parser->current.end = ending == NULL ? parser->end : ending + 1;
                    parser->current.type = YP_TOKEN_COMMENT;
@ -5902,7 +5898,7 @@ parser_lex(yp_parser_t *parser) {
                    // (either . or &.) that starts the next line. If there is, then this
                    // is going to become an ignored newline and we're going to instead
                    // return the call operator.
-                    const char *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
+                    const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
                    next_content += yp_strspn_inline_whitespace(next_content, parser->end - next_content);

                    if (next_content < parser->end) {
@ -5913,7 +5909,7 @@ parser_lex(yp_parser_t *parser) {
                        // Otherwise we'll return a regular newline.
                        if (next_content[0] == '#') {
                            // Here we look for a "." or "&." following a "\n".
-                            const char *following = next_newline(next_content, parser->end - next_content);
+                            const uint8_t *following = next_newline(next_content, parser->end - next_content);

                            while (following && (following + 1 < parser->end)) {
                                following++;
@ -6202,7 +6198,7 @@ parser_lex(yp_parser_t *parser) {
                            !lex_state_end_p(parser) &&
                            (!lex_state_p(parser, YP_LEX_STATE_ARG_ANY) || lex_state_p(parser, YP_LEX_STATE_LABELED) || space_seen)
                        ) {
-                            const char *end = parser->current.end;
+                            const uint8_t *end = parser->current.end;

                            yp_heredoc_quote_t quote = YP_HEREDOC_QUOTE_NONE;
                            yp_heredoc_indent_t indent = YP_HEREDOC_INDENT_NONE;
@ -6224,7 +6220,7 @@ parser_lex(yp_parser_t *parser) {
                                quote = YP_HEREDOC_QUOTE_SINGLE;
                            }

-                            const char *ident_start = parser->current.end;
+                            const uint8_t *ident_start = parser->current.end;
                            size_t width = 0;

                            if (parser->current.end >= parser->end) {
@ -6247,7 +6243,7 @@ parser_lex(yp_parser_t *parser) {
                                }

                                size_t ident_length = (size_t) (parser->current.end - ident_start);
-                                if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (char) quote)) {
+                                if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
                                    // TODO: handle unterminated heredoc
                                }

@ -6263,7 +6259,7 @@ parser_lex(yp_parser_t *parser) {
                                });

                                if (parser->heredoc_end == NULL) {
-                                    const char *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
+                                    const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);

                                    if (body_start == NULL) {
                                        // If there is no newline after the heredoc identifier, then
@ -6905,8 +6901,8 @@ parser_lex(yp_parser_t *parser) {
            // Here we'll get a list of the places where strpbrk should break,
            // and then find the first one.
            yp_lex_mode_t *lex_mode = parser->lex_modes.current;
-            const char *breakpoints = lex_mode->as.list.breakpoints;
-            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+            const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
+            const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);

            while (breakpoint != NULL) {
                // If we hit a null byte, skip directly past it.
@ -7028,8 +7024,8 @@ parser_lex(yp_parser_t *parser) {
            // These are the places where we need to split up the content of the
            // regular expression. We'll use strpbrk to find the first of these
            // characters.
-            const char *breakpoints = lex_mode->as.regexp.breakpoints;
-            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+            const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
+            const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);

            while (breakpoint != NULL) {
                // If we hit a null byte, skip directly past it.
@ -7162,8 +7158,8 @@ parser_lex(yp_parser_t *parser) {

            // These are the places where we need to split up the content of the
            // string. We'll use strpbrk to find the first of these characters.
-            const char *breakpoints = parser->lex_modes.current->as.string.breakpoints;
-            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+            const uint8_t *breakpoints = parser->lex_modes.current->as.string.breakpoints;
+            const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);

            while (breakpoint != NULL) {
                // If we hit the incrementor, then we'll increment then nesting and
@ -7314,13 +7310,13 @@ parser_lex(yp_parser_t *parser) {

            // Now let's grab the information about the identifier off of the current
            // lex mode.
-            const char *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
+            const uint8_t *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
            size_t ident_length = parser->lex_modes.current->as.heredoc.ident_length;

            // If we are immediately following a newline and we have hit the
            // terminator, then we need to return the ending of the heredoc.
            if (current_token_starts_line(parser)) {
-                const char *start = parser->current.start;
+                const uint8_t *start = parser->current.start;
                if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
                    start += yp_strspn_inline_whitespace(start, parser->end - start);
                }
@ -7360,14 +7356,14 @@ parser_lex(yp_parser_t *parser) {
            // Otherwise we'll be parsing string content. These are the places where
            // we need to split up the content of the heredoc. We'll use strpbrk to
            // find the first of these characters.
-            char breakpoints[] = "\n\\#";
+            uint8_t breakpoints[] = "\n\\#";

            yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
            if (quote == YP_HEREDOC_QUOTE_SINGLE) {
                breakpoints[2] = '\0';
            }

-            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+            const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);

            while (breakpoint != NULL) {
                switch (*breakpoint) {
@ -7384,7 +7380,7 @@ parser_lex(yp_parser_t *parser) {

                        yp_newline_list_append(&parser->newline_list, breakpoint);

-                        const char *start = breakpoint + 1;
+                        const uint8_t *start = breakpoint + 1;
                        if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
                            start += yp_strspn_inline_whitespace(start, parser->end - start);
                        }
@ -7966,10 +7962,11 @@ parse_target(yp_parser_t *parser, yp_node_t *target) {
                // the previous method name in, and append an =.
                size_t length = yp_string_length(&call->name);

-                char *name = calloc(length + 2, sizeof(char));
+                uint8_t *name = calloc(length + 1, sizeof(uint8_t));
                if (name == NULL) return NULL;

-                snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
+                memcpy(name, yp_string_source(&call->name), length);
+                name[length] = '=';

                // Now switch the name to the new string.
                yp_string_free(&call->name);
@ -8123,10 +8120,11 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod
                // the previous method name in, and append an =.
                size_t length = yp_string_length(&call->name);

-                char *name = calloc(length + 2, sizeof(char));
+                uint8_t *name = calloc(length + 1, sizeof(uint8_t));
                if (name == NULL) return NULL;

-                snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
+                memcpy(name, yp_string_source(&call->name), length);
+                name[length] = '=';

                // Now switch the name to the new string.
                yp_string_free(&call->name);
@ -9113,7 +9111,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
    // since we won't know the end until we've found all consequent
    // clauses. This sets the end location on all rescues once we know it
    if (current) {
-        const char *end_to_set = current->base.location.end;
+        const uint8_t *end_to_set = current->base.location.end;
        current = parent_node->rescue_clause;
        while (current) {
            current->base.location.end = end_to_set;
@ -9170,7 +9168,7 @@ parse_rescues_as_begin(yp_parser_t *parser, yp_statements_node_t *statements) {
    // All nodes within a begin node are optional, so we look
    // for the earliest possible node that we can use to set
    // the BeginNode's start location
-    const char * start = begin_node->base.location.start;
+    const uint8_t *start = begin_node->base.location.start;
    if (begin_node->statements) {
        start = begin_node->statements->base.location.start;
    } else if (begin_node->rescue_clause) {
@ -9845,7 +9843,7 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
        // variable.
        if (index == 0 || YP_NODE_TYPE_P(nodes->nodes[index - 1], YP_NODE_STRING_NODE)) {
            int cur_whitespace;
-            const char *cur_char = content_loc->start;
+            const uint8_t *cur_char = content_loc->start;

            while (cur_char && cur_char < content_loc->end) {
                // Any empty newlines aren't included in the minimum whitespace
@ -9936,15 +9934,15 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
        // destination to move bytes into. We'll also use it for bounds checking
        // since we don't require that these strings be null terminated.
        size_t dest_length = yp_string_length(string);
-        char *source_start = string->source;
+        uint8_t *source_start = (uint8_t *) string->source;

-        const char *source_cursor = source_start;
-        const char *source_end = source_cursor + dest_length;
+        const uint8_t *source_cursor = source_start;
+        const uint8_t *source_end = source_cursor + dest_length;

        // We're going to move bytes backward in the string when we get leading
        // whitespace, so we'll maintain a pointer to the current position in the
        // string that we're writing to.
-        char *dest_cursor = source_start;
+        uint8_t *dest_cursor = source_start;

        while (source_cursor < source_end) {
            // If we need to dedent the next element within the heredoc or the next
@ -9971,7 +9969,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu

            // At this point we have dedented all that we need to, so we need to find
            // the next newline.
-            const char *breakpoint = next_newline(source_cursor, source_end - source_cursor);
+            const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);

            if (breakpoint == NULL) {
                // If there isn't another newline, then we can just move the rest of the
@ -13587,7 +13585,7 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
            uint32_t local_size = yp_metadata_read_u32(metadata);
            metadata += 4;

-            yp_parser_local_add_location(parser, metadata, metadata + local_size);
+            yp_parser_local_add_location(parser, (const uint8_t *) metadata, (const uint8_t *) (metadata + local_size));
            metadata += local_size;
        }
    }
@ -13599,7 +13597,7 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {

 // Initialize a parser with the given start and end pointers.
 YP_EXPORTED_FUNCTION void
-yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath) {
+yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath) {
    assert(source != NULL);

    // Set filepath to the file that was passed
@ -13671,7 +13669,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
    yp_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);

    // Skip past the UTF-8 BOM if it exists.
-    if (size >= 3 && (unsigned char) source[0] == 0xef && (unsigned char) source[1] == 0xbb && (unsigned char) source[2] == 0xbf) {
+    if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
        parser->current.end += 3;
        parser->encoding_comment_start += 3;
    }
@ -13679,7 +13677,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
    // If the first two bytes of the source are a shebang, then we'll indicate
    // that the encoding comment is at the end of the shebang.
    if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
-        const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
+        const uint8_t *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
        if (encoding_comment_start) {
            parser->encoding_comment_start = encoding_comment_start + 1;
        }
@ -13751,7 +13749,7 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
 // Parse and serialize the AST represented by the given source to the given
 // buffer.
 YP_EXPORTED_FUNCTION void
-yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
+yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
    yp_parser_t parser;
    yp_parser_init(&parser, source, size, NULL);
    if (metadata) yp_parser_metadata(&parser, metadata);
--- a/yarp/yarp.h
+++ b/yarp/yarp.h
@ -40,7 +40,7 @@ void yp_scope_node_init(yp_node_t *node, yp_scope_node_t *dest);
 YP_EXPORTED_FUNCTION const char * yp_version(void);

 // Initialize a parser with the given start and end pointers.
-YP_EXPORTED_FUNCTION void yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath);
+YP_EXPORTED_FUNCTION void yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath);

 // Register a callback that will be called whenever YARP changes the encoding it
 // is using to parse based on the magic comment.
@ -66,14 +66,14 @@ YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, y
 YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);

 // Parse the given source to the AST and serialize the AST to the given buffer.
-YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata);
+YP_EXPORTED_FUNCTION void yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata);

 // Lex the given source and serialize to the given buffer.
-YP_EXPORTED_FUNCTION void yp_lex_serialize(const char *source, size_t size, const char *filepath, yp_buffer_t *buffer);
+YP_EXPORTED_FUNCTION void yp_lex_serialize(const uint8_t *source, size_t size, const char *filepath, yp_buffer_t *buffer);

 // Parse and serialize both the AST and the tokens represented by the given
 // source to the given buffer.
-YP_EXPORTED_FUNCTION void yp_parse_lex_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata);
+YP_EXPORTED_FUNCTION void yp_parse_lex_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata);

 // Returns a string representation of the given token type.
 YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);