main.c (2121B)
1 /* 2 2018 David DiPaola 3 licensed under CC0 (public domain, see https://creativecommons.org/publicdomain/zero/1.0/) 4 */ 5 6 #include <unistd.h> 7 8 #include <stdio.h> 9 10 static unsigned char 11 _byte_tohex(unsigned char byte) { 12 if ((byte >= '0') && (byte <= '9')) { 13 return (byte - '0') + 0x0; 14 } 15 else if ((byte >= 'A') && (byte <= 'F')) { 16 return (byte - 'A') + 0xA; 17 } 18 else if ((byte >= 'a') && (byte <= 'f')) { 19 return (byte - 'a') + 0xA; 20 } 21 else { 22 return 0xFF; 23 } 24 } 25 26 static void 27 _utf8_write(unsigned int codepoint) { 28 int status; 29 size_t bytes_size; 30 if (codepoint <= 0x7F) { 31 const char bytes[] = { 32 codepoint 33 }; 34 bytes_size = sizeof(bytes); 35 status = write(STDOUT_FILENO, bytes, bytes_size); 36 } 37 else if (codepoint <= 0x7FF) { 38 const char bytes[] = { 39 ((0b110 << 5) | ((codepoint >> (6 * 1)) & 0b11111)), 40 ((0b10 << 6) | ((codepoint >> (6 * 0)) & 0b111111)), 41 }; 42 bytes_size = sizeof(bytes); 43 status = write(STDOUT_FILENO, bytes, bytes_size); 44 } 45 else if (codepoint <= 0xFFFF) { 46 const char bytes[] = { 47 ((0b1110 << 4) | ((codepoint >> (6 * 2)) & 0b1111)), 48 ((0b10 << 6) | ((codepoint >> (6 * 1)) & 0b111111)), 49 ((0b10 << 6) | ((codepoint >> (6 * 0)) & 0b111111)), 50 }; 51 bytes_size = sizeof(bytes); 52 status = write(STDOUT_FILENO, bytes, bytes_size); 53 } 54 else if (codepoint <= 0x10FFFF) { 55 const char bytes[] = { 56 ((0b11110 << 3) | ((codepoint >> (6 * 3)) & 0b111)), 57 ((0b10 << 6) | ((codepoint >> (6 * 2)) & 0b111111)), 58 ((0b10 << 6) | ((codepoint >> (6 * 1)) & 0b111111)), 59 ((0b10 << 6) | ((codepoint >> (6 * 0)) & 0b111111)), 60 }; 61 bytes_size = sizeof(bytes); 62 status = write(STDOUT_FILENO, bytes, bytes_size); 63 } 64 if (status < bytes_size) { 65 perror(NULL); 66 } 67 } 68 69 int 70 main() { 71 unsigned char byte; 72 int sawhex = 0; 73 unsigned int codepoint = 0; 74 while (read(STDIN_FILENO, &byte, sizeof(byte)) == sizeof(byte)) { 75 unsigned char byte_hex = _byte_tohex(byte); 76 if (byte_hex <= 0xF) { 77 sawhex = 1; 78 codepoint = (codepoint << 4) | byte_hex; 79 } 80 else if(sawhex) { 81 _utf8_write(codepoint); 82 codepoint = 0; 83 sawhex = 0; 84 } 85 } 86 87 return 0; 88 } 89