commit 1e318d4886d1e8e4eff0c977bd0d41bf599c8d9f
Author: David DiPaola <DavidDiPaola@users.noreply.github.com>
Date: Thu, 27 Dec 2018 22:14:03 -0500
initial commit
Diffstat:
A | .gitignore | | | 4 | ++++ |
A | LICENSE | | | 117 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | Makefile | | | 15 | +++++++++++++++ |
A | README | | | 10 | ++++++++++ |
A | main.c | | | 89 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 235 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+utf8_encode
+
+.*.swp
+
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,117 @@
+CC0 1.0 Universal
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator and
+subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the
+purpose of contributing to a commons of creative, cultural and scientific
+works ("Commons") that the public can reliably and without fear of later
+claims of infringement build upon, modify, incorporate in other works, reuse
+and redistribute as freely as possible in any form whatsoever and for any
+purposes, including without limitation commercial purposes. These owners may
+contribute to the Commons to promote the ideal of a free culture and the
+further production of creative, cultural and scientific works, or to gain
+reputation or greater distribution for their Work in part through the use and
+efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation
+of additional consideration or compensation, the person associating CC0 with a
+Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
+and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
+and publicly distribute the Work under its terms, with knowledge of his or her
+Copyright and Related Rights in the Work and the meaning and intended legal
+effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not limited
+to, the following:
+
+ i. the right to reproduce, adapt, distribute, perform, display, communicate,
+ and translate a Work;
+
+ ii. moral rights retained by the original author(s) and/or performer(s);
+
+ iii. publicity and privacy rights pertaining to a person's image or likeness
+ depicted in a Work;
+
+ iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+
+ v. rights protecting the extraction, dissemination, use and reuse of data in
+ a Work;
+
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation thereof,
+ including any amended or successor version of such directive); and
+
+ vii. other similar, equivalent or corresponding rights throughout the world
+ based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of,
+applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
+unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
+and Related Rights and associated claims and causes of action, whether now
+known or unknown (including existing as well as future claims and causes of
+action), in the Work (i) in all territories worldwide, (ii) for the maximum
+duration provided by applicable law or treaty (including future time
+extensions), (iii) in any current or future medium and for any number of
+copies, and (iv) for any purpose whatsoever, including without limitation
+commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
+the Waiver for the benefit of each member of the public at large and to the
+detriment of Affirmer's heirs and successors, fully intending that such Waiver
+shall not be subject to revocation, rescission, cancellation, termination, or
+any other legal or equitable action to disrupt the quiet enjoyment of the Work
+by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be
+judged legally invalid or ineffective under applicable law, then the Waiver
+shall be preserved to the maximum extent permitted taking into account
+Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
+is so judged Affirmer hereby grants to each affected person a royalty-free,
+non transferable, non sublicensable, non exclusive, irrevocable and
+unconditional license to exercise Affirmer's Copyright and Related Rights in
+the Work (i) in all territories worldwide, (ii) for the maximum duration
+provided by applicable law or treaty (including future time extensions), (iii)
+in any current or future medium and for any number of copies, and (iv) for any
+purpose whatsoever, including without limitation commercial, advertising or
+promotional purposes (the "License"). The License shall be deemed effective as
+of the date CC0 was applied by Affirmer to the Work. Should any part of the
+License for any reason be judged legally invalid or ineffective under
+applicable law, such partial invalidity or ineffectiveness shall not
+invalidate the remainder of the License, and in such case Affirmer hereby
+affirms that he or she will not (i) exercise any of his or her remaining
+Copyright and Related Rights in the Work or (ii) assert any associated claims
+and causes of action with respect to the Work, in either case contrary to
+Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+
+ b. Affirmer offers the Work as-is and makes no representations or warranties
+ of any kind concerning the Work, express, implied, statutory or otherwise,
+ including without limitation warranties of title, merchantability, fitness
+ for a particular purpose, non infringement, or the absence of latent or
+ other defects, accuracy, or the present or absence of errors, whether or not
+ discoverable, all to the greatest extent permissible under applicable law.
+
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without limitation
+ any person's Copyright and Related Rights in the Work. Further, Affirmer
+ disclaims responsibility for obtaining any necessary consents, permissions
+ or other rights required for any use of the Work.
+
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to this
+ CC0 or use of the Work.
+
+For more information, please see
+<http://creativecommons.org/publicdomain/zero/1.0/>
+
diff --git a/Makefile b/Makefile
@@ -0,0 +1,15 @@
+#2018 David DiPaola
+#licensed under CC0 (public domain, see https://creativecommons.org/publicdomain/zero/1.0/)
+
+BIN = utf8_encode
+
+.PHONY: all
+all: $(BIN)
+
+$(BIN): main.c
+ $(CC) -Wall -Wextra -O2 $(CFLAGS) $< -o $@
+
+.PHONY: clean
+clean:
+ rm -rf $(BIN)
+
diff --git a/README b/README
@@ -0,0 +1,10 @@
+# nix_utf8_encode
+encode Unicode codepoints into UTF-8
+
+## usage
+`echo 'U+0024 U+00A2 U+0939 U+20AC U+10348' | utf8_encode`
+
+## building and installing
+ - build: `make`
+ - install: `sudo cp utf8_encode /usr/local/bin/`
+
diff --git a/main.c b/main.c
@@ -0,0 +1,89 @@
+/*
+2018 David DiPaola
+licensed under CC0 (public domain, see https://creativecommons.org/publicdomain/zero/1.0/)
+*/
+
+#include <unistd.h>
+
+#include <stdio.h>
+
+static unsigned char
+_byte_tohex(unsigned char byte) {
+ if ((byte >= '0') && (byte <= '9')) {
+ return (byte - '0') + 0x0;
+ }
+ else if ((byte >= 'A') && (byte <= 'F')) {
+ return (byte - 'A') + 0xA;
+ }
+ else if ((byte >= 'a') && (byte <= 'f')) {
+ return (byte - 'a') + 0xA;
+ }
+ else {
+ return 0xFF;
+ }
+}
+
+static void
+_utf8_write(unsigned int codepoint) {
+ int status;
+ size_t bytes_size;
+ if (codepoint <= 0x7F) {
+ const char bytes[] = {
+ codepoint
+ };
+ bytes_size = sizeof(bytes);
+ status = write(STDOUT_FILENO, bytes, bytes_size);
+ }
+ else if (codepoint <= 0x7FF) {
+ const char bytes[] = {
+ ((0b110 << 5) | ((codepoint >> (6 * 1)) & 0b11111)),
+ ((0b10 << 6) | ((codepoint >> (6 * 0)) & 0b111111)),
+ };
+ bytes_size = sizeof(bytes);
+ status = write(STDOUT_FILENO, bytes, bytes_size);
+ }
+ else if (codepoint <= 0xFFFF) {
+ const char bytes[] = {
+ ((0b1110 << 4) | ((codepoint >> (6 * 2)) & 0b1111)),
+ ((0b10 << 6) | ((codepoint >> (6 * 1)) & 0b111111)),
+ ((0b10 << 6) | ((codepoint >> (6 * 0)) & 0b111111)),
+ };
+ bytes_size = sizeof(bytes);
+ status = write(STDOUT_FILENO, bytes, bytes_size);
+ }
+ else if (codepoint <= 0x10FFFF) {
+ const char bytes[] = {
+ ((0b11110 << 3) | ((codepoint >> (6 * 3)) & 0b111)),
+ ((0b10 << 6) | ((codepoint >> (6 * 2)) & 0b111111)),
+ ((0b10 << 6) | ((codepoint >> (6 * 1)) & 0b111111)),
+ ((0b10 << 6) | ((codepoint >> (6 * 0)) & 0b111111)),
+ };
+ bytes_size = sizeof(bytes);
+ status = write(STDOUT_FILENO, bytes, bytes_size);
+ }
+ if (status < bytes_size) {
+ perror(NULL);
+ }
+}
+
+int
+main() {
+ unsigned char byte;
+ int sawhex = 0;
+ unsigned int codepoint = 0;
+ while (read(STDIN_FILENO, &byte, sizeof(byte)) == sizeof(byte)) {
+ unsigned char byte_hex = _byte_tohex(byte);
+ if (byte_hex <= 0xF) {
+ sawhex = 1;
+ codepoint = (codepoint << 4) | byte_hex;
+ }
+ else if(sawhex) {
+ _utf8_write(codepoint);
+ codepoint = 0;
+ sawhex = 0;
+ }
+ }
+
+ return 0;
+}
+