main.c (1577B)
1 /* 2 2018 David DiPaola 3 licensed under CC0 (public domain, see https://creativecommons.org/publicdomain/zero/1.0/) 4 */ 5 6 #include <unistd.h> 7 8 #include <stdio.h> 9 10 static void 11 _print_codepoint(unsigned int codepoint) { 12 if (codepoint <= 0xFFFF) { 13 printf("U+%04X" "\n", codepoint); 14 } 15 else if (codepoint <= 0xFFFFF) { 16 printf("U+%05X" "\n", codepoint); 17 } 18 else if (codepoint <= 0x10FFFF) { 19 printf("U+%06X" "\n", codepoint); 20 } 21 } 22 23 int 24 main() { 25 unsigned char byte; 26 unsigned int remaining = 0; 27 unsigned int codepoint = 0; 28 size_t offset = 0; 29 while (read(STDIN_FILENO, &byte, sizeof(byte)) == sizeof(byte)) { 30 if (remaining == 0) { 31 if (byte <= 0x7F) { 32 _print_codepoint(byte); 33 codepoint = 0; 34 remaining = 0; 35 } 36 else if (((byte >> 5) & 0b111) == 0b110) { 37 codepoint = byte & 0b11111; 38 remaining = 1; 39 } 40 else if (((byte >> 4) & 0b1111) == 0b1110) { 41 codepoint = byte & 0b1111; 42 remaining = 2; 43 } 44 else if (((byte >> 3) & 0b11111) == 0b11110) { 45 codepoint = byte & 0b111; 46 remaining = 3; 47 } 48 else { 49 fprintf(stderr, "ERROR at offset 0x%zX: invalid start byte: 0x%02X" "\n", offset, byte); 50 codepoint = 0; 51 remaining = 0; 52 } 53 } 54 else { 55 if (((byte >> 6) & 0b11) == 0b10) { 56 codepoint = (codepoint << 6) | (byte & 0b111111); 57 remaining--; 58 59 if (remaining == 0) { 60 _print_codepoint(codepoint); 61 } 62 } 63 else { 64 fprintf(stderr, "ERROR at offset 0x%zX: invalid continuation byte: 0x%02X" "\n", offset, byte); 65 codepoint = 0; 66 remaining = 0; 67 } 68 } 69 70 offset++; 71 } 72 73 return 0; 74 } 75