nix_utf8_encode

Encodes Unicode codepoints into UTF-8
git clone https://0xdd.org/code/nix_utf8_encode.git
Log | Files | Refs | README | LICENSE

commit 1e318d4886d1e8e4eff0c977bd0d41bf599c8d9f
Author: David DiPaola <DavidDiPaola@users.noreply.github.com>
Date:   Thu, 27 Dec 2018 22:14:03 -0500

initial commit

Diffstat:
A.gitignore | 4++++
ALICENSE | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AMakefile | 15+++++++++++++++
AREADME | 10++++++++++
Amain.c | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 235 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,4 @@ +utf8_encode + +.*.swp + diff --git a/LICENSE b/LICENSE @@ -0,0 +1,117 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see +<http://creativecommons.org/publicdomain/zero/1.0/> + diff --git a/Makefile b/Makefile @@ -0,0 +1,15 @@ +#2018 David DiPaola +#licensed under CC0 (public domain, see https://creativecommons.org/publicdomain/zero/1.0/) + +BIN = utf8_encode + +.PHONY: all +all: $(BIN) + +$(BIN): main.c + $(CC) -Wall -Wextra -O2 $(CFLAGS) $< -o $@ + +.PHONY: clean +clean: + rm -rf $(BIN) + diff --git a/README b/README @@ -0,0 +1,10 @@ +# nix_utf8_encode +encode Unicode codepoints into UTF-8 + +## usage +`echo 'U+0024 U+00A2 U+0939 U+20AC U+10348' | utf8_encode` + +## building and installing + - build: `make` + - install: `sudo cp utf8_encode /usr/local/bin/` + diff --git a/main.c b/main.c @@ -0,0 +1,89 @@ +/* +2018 David DiPaola +licensed under CC0 (public domain, see https://creativecommons.org/publicdomain/zero/1.0/) +*/ + +#include <unistd.h> + +#include <stdio.h> + +static unsigned char +_byte_tohex(unsigned char byte) { + if ((byte >= '0') && (byte <= '9')) { + return (byte - '0') + 0x0; + } + else if ((byte >= 'A') && (byte <= 'F')) { + return (byte - 'A') + 0xA; + } + else if ((byte >= 'a') && (byte <= 'f')) { + return (byte - 'a') + 0xA; + } + else { + return 0xFF; + } +} + +static void +_utf8_write(unsigned int codepoint) { + int status; + size_t bytes_size; + if (codepoint <= 0x7F) { + const char bytes[] = { + codepoint + }; + bytes_size = sizeof(bytes); + status = write(STDOUT_FILENO, bytes, bytes_size); + } + else if (codepoint <= 0x7FF) { + const char bytes[] = { + ((0b110 << 5) | ((codepoint >> (6 * 1)) & 0b11111)), + ((0b10 << 6) | ((codepoint >> (6 * 0)) & 0b111111)), + }; + bytes_size = sizeof(bytes); + status = write(STDOUT_FILENO, bytes, bytes_size); + } + else if (codepoint <= 0xFFFF) { + const char bytes[] = { + ((0b1110 << 4) | ((codepoint >> (6 * 2)) & 0b1111)), + ((0b10 << 6) | ((codepoint >> (6 * 1)) & 0b111111)), + ((0b10 << 6) | ((codepoint >> (6 * 0)) & 0b111111)), + }; + bytes_size = sizeof(bytes); + status = write(STDOUT_FILENO, bytes, bytes_size); + } + else if (codepoint <= 0x10FFFF) { + const char bytes[] = { + ((0b11110 << 3) | ((codepoint >> (6 * 3)) & 0b111)), + ((0b10 << 6) | ((codepoint >> (6 * 2)) & 0b111111)), + ((0b10 << 6) | ((codepoint >> (6 * 1)) & 0b111111)), + ((0b10 << 6) | ((codepoint >> (6 * 0)) & 0b111111)), + }; + bytes_size = sizeof(bytes); + status = write(STDOUT_FILENO, bytes, bytes_size); + } + if (status < bytes_size) { + perror(NULL); + } +} + +int +main() { + unsigned char byte; + int sawhex = 0; + unsigned int codepoint = 0; + while (read(STDIN_FILENO, &byte, sizeof(byte)) == sizeof(byte)) { + unsigned char byte_hex = _byte_tohex(byte); + if (byte_hex <= 0xF) { + sawhex = 1; + codepoint = (codepoint << 4) | byte_hex; + } + else if(sawhex) { + _utf8_write(codepoint); + codepoint = 0; + sawhex = 0; + } + } + + return 0; +} +