From f1998c321a4eec6d75b58d84aa8610971bf21979 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Sat, 31 Jul 2021 11:35:39 -0600 Subject: move static files into static sub-dir, refactor nix a bit --- .../2013-04-09-erlang-tcp-socket-pull-pattern.md | 257 +++++++++ static/src/_posts/2013-07-11-goplus.md | 78 +++ static/src/_posts/2013-10-08-generations.md | 101 ++++ static/src/_posts/2013-10-25-namecoind-ssl.md | 249 +++++++++ static/src/_posts/2014-01-11-diamond-square.md | 495 +++++++++++++++++ static/src/_posts/2014-10-29-erlang-pitfalls.md | 193 +++++++ static/src/_posts/2015-03-11-rabbit-hole.md | 166 ++++++ static/src/_posts/2015-07-15-go-http.md | 547 +++++++++++++++++++ static/src/_posts/2015-11-21-happy-trees.md | 236 +++++++++ static/src/_posts/2017-09-06-brian-bars.md | 105 ++++ .../src/_posts/2018-10-25-rethinking-identity.md | 293 ++++++++++ static/src/_posts/2018-11-12-viz-1.md | 55 ++ static/src/_posts/2018-11-12-viz-2.md | 50 ++ ...19-08-02-program-structure-and-composability.md | 588 +++++++++++++++++++++ .../src/_posts/2020-04-26-trading-in-the-rain.md | 56 ++ static/src/_posts/2020-05-30-denver-protests.md | 161 ++++++ static/src/_posts/2020-07-07-viz-3.md | 155 ++++++ .../2020-11-16-component-oriented-programming.md | 353 +++++++++++++ .../_posts/2021-01-01-new-year-new-resolution.md | 50 ++ static/src/_posts/2021-01-09-ginger.md | 354 +++++++++++++ static/src/_posts/2021-01-14-the-web.md | 241 +++++++++ .../src/_posts/2021-01-23-goodbye-github-pages.md | 247 +++++++++ .../_posts/2021-01-30-building-mobile-nebula.md | 390 ++++++++++++++ static/src/_posts/2021-02-06-old-code-new-ideas.md | 224 ++++++++ .../2021-02-13-building-gomobile-using-nix.md | 232 ++++++++ static/src/_posts/2021-02-25-married.md | 18 + .../_posts/2021-03-01-conditionals-in-ginger.md | 195 +++++++ .../2021-03-04-conditionals-in-ginger-errata.md | 195 +++++++ static/src/_posts/2021-03-12-ripple-a-game.md | 311 +++++++++++ .../2021-03-20-a-simple-rule-for-better-errors.md | 227 ++++++++ static/src/_posts/2021-04-01-fmail.md | 172 ++++++ ...2021-04-06-evaluation-of-network-filesystems.md | 339 ++++++++++++ static/src/_posts/2021-04-11-ripple-v2.md | 436 +++++++++++++++ ...sing-processes-into-a-static-binary-with-nix.md | 248 +++++++++ static/src/_posts/2021-04-27-loops-in-ginger.md | 223 ++++++++ static/src/_posts/2021-05-02-nfts.md | 349 ++++++++++++ static/src/_posts/2021-05-11-ripple-v3.md | 442 ++++++++++++++++ .../2021-05-16-new-years-resolution-vibe-check.md | 62 +++ static/src/_posts/2021-05-26-viz-4.md | 213 ++++++++ static/src/_posts/2021-05-28-viz-5.md | 306 +++++++++++ static/src/_posts/2021-06-07-adventures-in-defi.md | 271 ++++++++++ static/src/_posts/2021-06-23-viz-6.md | 402 ++++++++++++++ .../2021-06-26-selfhosted-email-with-maddy.md | 277 ++++++++++ static/src/_posts/2021-07-01-viz-7.md | 440 +++++++++++++++ static/src/_posts/2021-07-06-maddy-vps.md | 115 ++++ .../_posts/2021-07-14-how-to-secure-a-webapp.md | 315 +++++++++++ static/src/_posts/2021-07-18-radix-v4.md | 248 +++++++++ 47 files changed, 11680 insertions(+) create mode 100644 static/src/_posts/2013-04-09-erlang-tcp-socket-pull-pattern.md create mode 100644 static/src/_posts/2013-07-11-goplus.md create mode 100644 static/src/_posts/2013-10-08-generations.md create mode 100644 static/src/_posts/2013-10-25-namecoind-ssl.md create mode 100644 static/src/_posts/2014-01-11-diamond-square.md create mode 100644 static/src/_posts/2014-10-29-erlang-pitfalls.md create mode 100644 static/src/_posts/2015-03-11-rabbit-hole.md create mode 100644 static/src/_posts/2015-07-15-go-http.md create mode 100644 static/src/_posts/2015-11-21-happy-trees.md create mode 100644 static/src/_posts/2017-09-06-brian-bars.md create mode 100644 static/src/_posts/2018-10-25-rethinking-identity.md create mode 100644 static/src/_posts/2018-11-12-viz-1.md create mode 100644 static/src/_posts/2018-11-12-viz-2.md create mode 100644 static/src/_posts/2019-08-02-program-structure-and-composability.md create mode 100644 static/src/_posts/2020-04-26-trading-in-the-rain.md create mode 100644 static/src/_posts/2020-05-30-denver-protests.md create mode 100644 static/src/_posts/2020-07-07-viz-3.md create mode 100644 static/src/_posts/2020-11-16-component-oriented-programming.md create mode 100644 static/src/_posts/2021-01-01-new-year-new-resolution.md create mode 100644 static/src/_posts/2021-01-09-ginger.md create mode 100644 static/src/_posts/2021-01-14-the-web.md create mode 100644 static/src/_posts/2021-01-23-goodbye-github-pages.md create mode 100644 static/src/_posts/2021-01-30-building-mobile-nebula.md create mode 100644 static/src/_posts/2021-02-06-old-code-new-ideas.md create mode 100644 static/src/_posts/2021-02-13-building-gomobile-using-nix.md create mode 100644 static/src/_posts/2021-02-25-married.md create mode 100644 static/src/_posts/2021-03-01-conditionals-in-ginger.md create mode 100644 static/src/_posts/2021-03-04-conditionals-in-ginger-errata.md create mode 100644 static/src/_posts/2021-03-12-ripple-a-game.md create mode 100644 static/src/_posts/2021-03-20-a-simple-rule-for-better-errors.md create mode 100644 static/src/_posts/2021-04-01-fmail.md create mode 100644 static/src/_posts/2021-04-06-evaluation-of-network-filesystems.md create mode 100644 static/src/_posts/2021-04-11-ripple-v2.md create mode 100644 static/src/_posts/2021-04-22-composing-processes-into-a-static-binary-with-nix.md create mode 100644 static/src/_posts/2021-04-27-loops-in-ginger.md create mode 100644 static/src/_posts/2021-05-02-nfts.md create mode 100644 static/src/_posts/2021-05-11-ripple-v3.md create mode 100644 static/src/_posts/2021-05-16-new-years-resolution-vibe-check.md create mode 100644 static/src/_posts/2021-05-26-viz-4.md create mode 100644 static/src/_posts/2021-05-28-viz-5.md create mode 100644 static/src/_posts/2021-06-07-adventures-in-defi.md create mode 100644 static/src/_posts/2021-06-23-viz-6.md create mode 100644 static/src/_posts/2021-06-26-selfhosted-email-with-maddy.md create mode 100644 static/src/_posts/2021-07-01-viz-7.md create mode 100644 static/src/_posts/2021-07-06-maddy-vps.md create mode 100644 static/src/_posts/2021-07-14-how-to-secure-a-webapp.md create mode 100644 static/src/_posts/2021-07-18-radix-v4.md (limited to 'static/src/_posts') diff --git a/static/src/_posts/2013-04-09-erlang-tcp-socket-pull-pattern.md b/static/src/_posts/2013-04-09-erlang-tcp-socket-pull-pattern.md new file mode 100644 index 0000000..4c9151f --- /dev/null +++ b/static/src/_posts/2013-04-09-erlang-tcp-socket-pull-pattern.md @@ -0,0 +1,257 @@ +--- +title: "Erlang, tcp sockets, and active true" +description: >- + Using `{active:once}` isn't always the best way to handle connections. +tags: tech +--- + +If you don't know erlang then [you're missing out][0]. If you do know erlang, +you've probably at some point done something with tcp sockets. Erlang's highly +concurrent model of execution lends itself well to server programs where a high +number of active connections is desired. Each thread can autonomously handle its +single client, greatly simplifying the logic of the whole application while +still retaining [great performance characteristics][1]. + +## Background + +For an erlang thread which owns a single socket there are three different ways +to receive data off of that socket. These all revolve around the `active` +[setopts][2] flag. A socket can be set to one of: + +* `{active,false}` - All data must be obtained through [recv/2][3] calls. This + amounts to syncronous socket reading. + +* `{active,true}` - All data on the socket gets sent to the controlling thread + as a normal erlang message. It is the thread's + responsibility to keep up with the buffered data in the + message queue. This amounts to asyncronous socket reading. + +* `{active,once}` - When set the socket is placed in `{active,true}` for a + single packet. That is, once set the thread can expect a + single message to be sent to when data comes in. To receive + any more data off of the socket the socket must either be + read from using [recv/2][3] or be put in `{active,once}` or + `{active,true}`. + +## Which to use? + +Many (most?) tutorials advocate using `{active,once}` in your application +\[0]\[1]\[2]. This has to do with usability and security. When in `{active,true}` +it's possible for a client to flood the connection faster than the receiving +process will process those messages, potentially eating up a lot of memory in +the VM. However, if you want to be able to receive both tcp data messages as +well as other messages from other erlang processes at the same time you can't +use `{active,false}`. So `{active,once}` is generally preferred because it +deals with both of these problems quite well. + +## Why not to use `{active,once}` + +Here's what your classic `{active,once}` enabled tcp socket implementation will +probably look like: + +```erlang +-module(tcp_test). +-compile(export_all). + +-define(TCP_OPTS, [ + binary, + {packet, raw}, + {nodelay,true}, + {active, false}, + {reuseaddr, true}, + {keepalive,true}, + {backlog,500} +]). + +%Start listening +listen(Port) -> + {ok, L} = gen_tcp:listen(Port, ?TCP_OPTS), + ?MODULE:accept(L). + +%Accept a connection +accept(L) -> + {ok, Socket} = gen_tcp:accept(L), + ?MODULE:read_loop(Socket), + io:fwrite("Done reading, connection was closed\n"), + ?MODULE:accept(L). + +%Read everything it sends us +read_loop(Socket) -> + inet:setopts(Socket, [{active, once}]), + receive + {tcp, _, _} -> + do_stuff_here, + ?MODULE:read_loop(Socket); + {tcp_closed, _}-> donezo; + {tcp_error, _, _} -> donezo + end. +``` + +This code isn't actually usable for a production system; it doesn't even spawn a +new process for the new socket. But that's not the point I'm making. If I run it +with `tcp_test:listen(8000)`, and in other window do: + +```bash +while [ 1 ]; do echo "aloha"; done | nc localhost 8000 +``` + +We'll be flooding the the server with data pretty well. Using [eprof][4] we can +get an idea of how our code performs, and where the hang-ups are: + +```erlang +1> eprof:start(). +{ok,<0.34.0>} + +2> P = spawn(tcp_test,listen,[8000]). +<0.36.0> + +3> eprof:start_profiling([P]). +profiling + +4> running_the_while_loop. +running_the_while_loop + +5> eprof:stop_profiling(). +profiling_stopped + +6> eprof:analyze(procs,[{sort,time}]). + +****** Process <0.36.0> -- 100.00 % of profiled time *** +FUNCTION CALLS % TIME [uS / CALLS] +-------- ----- --- ---- [----------] +prim_inet:type_value_2/2 6 0.00 0 [ 0.00] + +....snip.... + +prim_inet:enc_opts/2 6 0.00 8 [ 1.33] +prim_inet:setopts/2 12303599 1.85 1466319 [ 0.12] +tcp_test:read_loop/1 12303598 2.22 1761775 [ 0.14] +prim_inet:encode_opt_val/1 12303599 3.50 2769285 [ 0.23] +prim_inet:ctl_cmd/3 12303600 4.29 3399333 [ 0.28] +prim_inet:enc_opt_val/2 24607203 5.28 4184818 [ 0.17] +inet:setopts/2 12303598 5.72 4533863 [ 0.37] +erlang:port_control/3 12303600 77.13 61085040 [ 4.96] +``` + +eprof shows us where our process is spending the majority of its time. The `%` +column indicates percentage of time the process spent during profiling inside +any function. We can pretty clearly see that the vast majority of time was spent +inside `erlang:port_control/3`, the BIF that `inet:setopts/2` uses to switch the +socket to `{active,once}` mode. Amongst the calls which were called on every +loop, it takes up by far the most amount of time. In addition all of those other +calls are also related to `inet:setopts/2`. + +I'm gonna rewrite our little listen server to use `{active,true}`, and we'll do +it all again: + +```erlang +-module(tcp_test). +-compile(export_all). + +-define(TCP_OPTS, [ + binary, + {packet, raw}, + {nodelay,true}, + {active, false}, + {reuseaddr, true}, + {keepalive,true}, + {backlog,500} +]). + +%Start listening +listen(Port) -> + {ok, L} = gen_tcp:listen(Port, ?TCP_OPTS), + ?MODULE:accept(L). + +%Accept a connection +accept(L) -> + {ok, Socket} = gen_tcp:accept(L), + inet:setopts(Socket, [{active, true}]), %Well this is new + ?MODULE:read_loop(Socket), + io:fwrite("Done reading, connection was closed\n"), + ?MODULE:accept(L). + +%Read everything it sends us +read_loop(Socket) -> + %inet:setopts(Socket, [{active, once}]), + receive + {tcp, _, _} -> + do_stuff_here, + ?MODULE:read_loop(Socket); + {tcp_closed, _}-> donezo; + {tcp_error, _, _} -> donezo + end. +``` + +And the profiling results: + +```erlang +1> eprof:start(). +{ok,<0.34.0>} + +2> P = spawn(tcp_test,listen,[8000]). +<0.36.0> + +3> eprof:start_profiling([P]). +profiling + +4> running_the_while_loop. +running_the_while_loop + +5> eprof:stop_profiling(). +profiling_stopped + +6> eprof:analyze(procs,[{sort,time}]). + +****** Process <0.36.0> -- 100.00 % of profiled time *** +FUNCTION CALLS % TIME [uS / CALLS] +-------- ----- --- ---- [----------] +prim_inet:enc_value_1/3 7 0.00 1 [ 0.14] +prim_inet:decode_opt_val/1 1 0.00 1 [ 1.00] +inet:setopts/2 1 0.00 2 [ 2.00] +prim_inet:setopts/2 2 0.00 2 [ 1.00] +prim_inet:enum_name/2 1 0.00 2 [ 2.00] +erlang:port_set_data/2 1 0.00 2 [ 2.00] +inet_db:register_socket/2 1 0.00 3 [ 3.00] +prim_inet:type_value_1/3 7 0.00 3 [ 0.43] + +.... snip .... + +prim_inet:type_opt_1/1 19 0.00 7 [ 0.37] +prim_inet:enc_value/3 7 0.00 7 [ 1.00] +prim_inet:enum_val/2 6 0.00 7 [ 1.17] +prim_inet:dec_opt_val/1 7 0.00 7 [ 1.00] +prim_inet:dec_value/2 6 0.00 10 [ 1.67] +prim_inet:enc_opt/1 13 0.00 12 [ 0.92] +prim_inet:type_opt/2 19 0.00 33 [ 1.74] +erlang:port_control/3 3 0.00 59 [ 19.67] +tcp_test:read_loop/1 20716370 100.00 12187488 [ 0.59] +``` + +This time our process spent almost no time at all (according to eprof, 0%) +fiddling with the socket opts. Instead it spent all of its time in the +read_loop doing the work we actually want to be doing. + +## So what does this mean? + +I'm by no means advocating never using `{active,once}`. The security concern is +still a completely valid concern and one that `{active,once}` mitigates quite +well. I'm simply pointing out that this mitigation has some fairly serious +performance implications which have the potential to bite you if you're not +careful, especially in cases where a socket is going to be receiving a large +amount of traffic. + +## Meta + +These tests were done using R15B03, but I've done similar ones in R14 and found +similar results. I have not tested R16. + +* \[0] http://learnyousomeerlang.com/buckets-of-sockets +* \[1] http://www.erlang.org/doc/man/gen_tcp.html#examples +* \[2] http://erlycoder.com/25/erlang-tcp-server-tcp-client-sockets-with-gen_tcp + +[0]: http://learnyousomeerlang.com/content +[1]: http://www.metabrew.com/article/a-million-user-comet-application-with-mochiweb-part-1 +[2]: http://www.erlang.org/doc/man/inet.html#setopts-2 +[3]: http://www.erlang.org/doc/man/gen_tcp.html#recv-2 +[4]: http://www.erlang.org/doc/man/eprof.html diff --git a/static/src/_posts/2013-07-11-goplus.md b/static/src/_posts/2013-07-11-goplus.md new file mode 100644 index 0000000..5e63eb2 --- /dev/null +++ b/static/src/_posts/2013-07-11-goplus.md @@ -0,0 +1,78 @@ +--- +title: Go+ +description: >- + A simple proof-of-concept script for doing go dependency management. +tags: tech +--- + +Compared to other languages go has some strange behavior regarding its project +root settings. If you import a library called `somelib`, go will look for a +`src/somelib` folder in all of the folders in the `$GOPATH` environment +variable. This works nicely for globally installed packages, but it makes +encapsulating a project with a specific version, or modified version, rather +tedious. Whenever you go to work on this project you'll have to add its path to +your `$GOPATH`, or add the path permanently, which could break other projects +which may use a different version of `somelib`. + +My solution is in the form of a simple script I'm calling go+. go+ will search +in currrent directory and all of its parents for a file called `GOPROJROOT`. If +it finds that file in a directory, it prepends that directory's absolute path to +your `$GOPATH` and stops the search. Regardless of whether or not `GOPROJROOT` +was found go+ will passthrough all arguments to the actual go call. The +modification to `$GOPATH` will only last the duration of the call. + +As an example, consider the following: +``` +/tmp + /hello + GOPROJROOT + /src + /somelib/somelib.go + /hello.go +``` + +If `hello.go` depends on `somelib`, as long as you run go+ from `/tmp/hello` or +one of its children your project will still compile + +Here is the source code for go+: + +```bash +#!/bin/sh + +SEARCHING_FOR=GOPROJROOT +ORIG_DIR=$(pwd) + +STOPSEARCH=0 +SEARCH_DIR=$ORIG_DIR +while [ $STOPSEARCH = 0 ]; do + + RES=$( find $SEARCH_DIR -maxdepth 1 -type f -name $SEARCHING_FOR | \ + grep -P "$SEARCHING_FOR$" | \ + head -n1 ) + + if [ "$RES" = "" ]; then + if [ "$SEARCH_DIR" = "/" ]; then + STOPSEARCH=1 + fi + cd .. + SEARCH_DIR=$(pwd) + else + export GOPATH=$SEARCH_DIR:$GOPATH + STOPSEARCH=1 + fi +done + +cd "$ORIG_DIR" +exec go $@ +``` + +## UPDATE: Goat + +I'm leaving this post for posterity, but go+ has some serious flaws in it. For +one, it doesn't allow for specifying the version of a dependency you want to +use. To this end, I wrote [goat][0] which does all the things go+ does, plus +real dependency management, PLUS it is built in a way that if you've been +following go's best-practices for code organization you shouldn't have to change +any of your existing code AT ALL. It's cool, check it out. + +[0]: http://github.com/mediocregopher/goat diff --git a/static/src/_posts/2013-10-08-generations.md b/static/src/_posts/2013-10-08-generations.md new file mode 100644 index 0000000..39e7cac --- /dev/null +++ b/static/src/_posts/2013-10-08-generations.md @@ -0,0 +1,101 @@ +--- +title: Generations +description: >- + A simple file distribution strategy for very large scale, high-availability + file-services. +tags: tech +--- + +## The problem + +At [cryptic.io][cryptic] we plan on having millions of different +files, any of which could be arbitrarily chosen to be served any given time. +These files are uploaded by users at arbitrary times. + +Scaling such a system is no easy task. The solution I've seen implemented in the +past involves shuffling files around on a nearly constant basis, making sure +that files which are more "popular" are on fast drives, while at the same time +making sure that no drives are at capicty and at the same time that all files, +even newly uploaded ones, are stored redundantly. + +The problem with this solution is one of coordination. At any given moment the +app needs to be able to "find" a file so it can give the client a link to +download the file from one of the servers that it's on. Full-filling this simple +requirement means that all datastores/caches where information about where a +file lives need to be up-to-date at all times, and even then there are +race-conditions and network failures to contend with, while at all times the +requirements of the app evolve and change. + +## A simpler solution + +Let's say you want all files which get uploaded to be replicated in triplicate +in some capacity. You buy three identical hard-disks, and put each on a separate +server. As files get uploaded by clients, each file gets put on each drive +immediately. When the drives are filled (which should be at around the same +time), you stop uploading to them. + +That was generation 0. + +You buy three more drives, and start putting all files on them instead. This is +going to be generation 1. Repeat until you run out of money. + +That's it. + +### That's it? + +It seems simple and obvious, and maybe it's the standard thing which is done, +but as far as I can tell no-one has written about it (though I'm probably not +searching for the right thing, let me know if this is the case!). + +### Advantages + +* It's so simple to implement, you could probably do it in a day if you're +starting a project from scratch + +* By definition of the scheme all files are replicated in multiple places. + +* Minimal information about where a file "is" needs to be stored. When a file is +uploaded all that's needed is to know what generation it is in, and then what +nodes/drives are in that generation. If the file's name is generated +server-side, then the file's generation could be *part* of its name, making +lookup even faster. + +* Drives don't need to "know" about each other. What I mean by this is that +whatever is running as the receive point for file-uploads on each drive doesn't +have to coordinate with its siblings running on the other drives in the +generation. In fact it doesn't need to coordinate with anyone. You could +literally rsync files onto your drives if you wanted to. I would recommend using +[marlin][0] though :) + +* Scaling is easy. When you run out of space you can simply start a new +generation. If you don't like playing that close to the chest there's nothing to +say you can't have two generations active at the same time. + +* Upgrading is easy. As long as a generation is not marked-for-upload, you can +easily copy all files in the generation into a new set of bigger, badder drives, +add those drives into the generation in your code, remove the old ones, then +mark the generation as uploadable again. + +* Distribution is easy. You just copy a generation's files onto a new drive in +Europe or wherever you're getting an uptick in traffic from and you're good to +go. + +* Management is easy. It's trivial to find out how many times a file has been +replicated, or how many countries it's in, or what hardware it's being served +from (given you have easy access to information about specific drives). + +### Caveats + +The big caveat here is that this is just an idea. It has NOT been tested in +production. But we have enough faith in it that we're going to give it a shot at +[cryptic.io][cryptic]. I'll keep this page updated. + +The second caveat is that this scheme does not inherently support caching. If a +file suddenly becomes super popular the world over your hard-disks might not be +able to keep up, and it's probably not feasible to have an FIO drive in *every* +generation. I think that [groupcache][1] may be the answer to this problem, +assuming your files are reasonably small, but again I haven't tested it yet. + +[cryptic]: https://cryptic.io +[0]: https://github.com/cryptic-io/marlin +[1]: https://github.com/golang/groupcache diff --git a/static/src/_posts/2013-10-25-namecoind-ssl.md b/static/src/_posts/2013-10-25-namecoind-ssl.md new file mode 100644 index 0000000..deded79 --- /dev/null +++ b/static/src/_posts/2013-10-25-namecoind-ssl.md @@ -0,0 +1,249 @@ +--- +title: Namecoin, A Replacement For SSL +description: >- + If we use the namecoin chain as a DNS service we get security almost for + free, along with lots of other benefits. +tags: tech crypto +--- + +At [cryptic.io][cryptic] we are creating a client-side, in-browser encryption +system where a user can upload their already encrypted content to our storage +system and be 100% confident that their data can never be decrypted by anyone +but them. + +One of the main problems with this approach is that the client has to be sure +that the code that's being run in their browser is the correct code; that is, +that they aren't the subject of a man-in-the-middle attack where an attacker is +turning our strong encryption into weak encryption that they could later break. + +A component of our current solution is to deliver the site's javascript (and all +other assets, for that matter) using SSL encryption. This protects the files +from tampering in-between leaving our servers and being received by the client. +Unfortunately, SSL isn't 100% foolproof. This post aims to show why SSL is +faulty, and propose a solution. + +## SSL + +SSL is the mechanism by which web-browsers establish an encrypted connection to +web-servers. The goal of this connection is that only the destination +web-browser and the server know what data is passing between them. Anyone spying +on the connection would only see gibberish. To do this a secret key is first +established between the client and the server, and used to encrypt/decrypt all +data. As long as no-one but those parties knows that key, that data will never +be decrypted by anyone else. + +SSL is what's used to establish that secret key on a per-session basis, so that +a key isn't ever re-used and so only the client and the server know it. + +### Public-Private Key Cryptography + +SSL is based around public-private key cryptography. In a public-private key +system, you have both a public key which is generated from a private key. The +public key can be given to anyone, but the private key must remain hidden. There +are two main uses for these two keys: + +* Someone can encrypt a message with your public key, and only you (with the + private key) can decrypt it. + +* You can sign a message with your private key, and anyone with your public key + can verify that it was you and not someone else who signed it. + +These are both extremely useful functions, not just for internet traffic but for +any kind of communication form. Unfortunately, there remains a fundamental flaw. +At some point you must give your public key to the other person in an insecure +way. If an attacker was to intercept your message containing your public key and +swap it for their own, then all future communications could be compromised. That +attacker could create messages the other person would think are from you, and +the other person would encrypt messages meant for you but which would be +decrypt-able by the attacker. + +### How does SSL work? + +SSL is at its heart a public-private key system, but its aim is to be more +secure against the attack described above. + +SSL uses a trust-chain to verify that a public key is the intended one. Your web +browser has a built-in set of public keys, called the root certificates, that it +implicitly trusts. These root certificates are managed by a small number of +companies designated by some agency who decides on these things. + +When you receive a server's SSL certificate (its public key) that certificate +will be signed by a root certificate. You can verify that signature since you +have the root certificate's public key built into your browser. If the signature +checks out then you know a certificate authority trusts the public key the site +gave you, which means you can trust it too. + +There's a bit (a lot!) more to SSL than this, but this is enough to understand +the fundamental problems with it. + +### How SSL doesn't work + +SSL has a few glaring problems. One, it implies we trust the companies holding +the root certificates to not be compromised. If some malicious agency was to get +ahold of a root certificate they could listen in on any connection on the +internet by swapping a site's real certificate with one they generate on the +fly. They could trivially steal any data we send on the internet. + +The second problem is that it's expensive. Really expensive. If you're running a +business you'll have to shell out about $200 a year to keep your SSL certificate +signed (those signatures have an expiration date attached). Since there's very +few root authorities there's an effective monopoly on signatures, and there's +nothing we can do about it. For 200 bucks I know most people simply say "no +thanks" and go unencrypted. The solution is creating a bigger problem. + +## Bitcoins + +Time to switch gears, and propose a solution to the above issues: namecoins. I'm +going to first talk about what namecoins are, how they work, and why we need +them. To start with, namecoins are based on bitcoins. + +If you haven't yet checked out bitcoins, [I highly encourage you to do +so][bitcoins]. They're awesome, and I think they have a chance of really +changing the way we think of and use money in the future. At the moment they're +still a bit of a novelty in the tech realm, but they're growing in popularity. + +The rest of this post assumes you know more or less what bitcoins are, and how +they work. + +## Namecoins + +Few people actually know about bitcoins. Even fewer know that there's other +crypto-currencies besides bitcoins. Basically, developers of these alternative +currencies (altcoins, in the parlance of our times) took the original bitcoin +source code and modified it to produce a new, separate blockchain from the +original bitcoin one. The altcoins are based on the same idea as bitcoins +(namely, a chain of blocks representing all the transactions ever made), but +have slightly different characterstics. + +One of these altcoins is called namecoin. Where other altcoins aim to be digital +currencies, and used as such (like bitcoins), namecoin has a different goal. The +point of namecoin is to create a global, distributed, secure key-value store. +You spend namecoins to claim arbitrary keys (once you've claimed it, you own it +for a set period of time) and to give those keys arbitrary values. Anyone else +with namecoind running can see these values. + +### Why use it? + +A blockchain based on a digital currency seems like a weird idea at first. I +know when I first read about it I was less than thrilled. How is this better +than a DHT? It's a key-value store, why is there a currency involved? + +#### DHT + +DHT stands for Distributed Hash-Table. I'm not going to go too into how they +work, but suffice it to say that they are essentially a distributed key-value +store. Like namecoin. The difference is in the operation. DHTs operate by +spreading and replicating keys and their values across nodes in a P2P mesh. They +have [lots of issues][dht] as far as security goes, the main one being that it's +fairly easy for an attacker to forge the value for a given key, and very +difficult to stop them from doing so or even to detect that it's happened. + +Namecoins don't have this problem. To forge a particular key an attacker would +essentially have to create a new blockchain from a certain point in the existing +chain, and then replicate all the work put into the existing chain into that new +compromised one so that the new one is longer and other clients in the network +will except it. This is extremely non-trivial. + +#### Why a currency? + +To answer why a currency needs to be involved, we need to first look at how +bitcoin/namecoin work. When you take an action (send someone money, set a value +to a key) that action gets broadcast to the network. Nodes on the network +collect these actions into a block, which is just a collection of multiple +actions. Their goal is to find a hash of this new block, combined with some data +from the top-most block in the existing chain, combined with some arbitrary +data, such that the first n characters in the resulting hash are zeros (with n +constantly increasing). When they find one they broadcast it out on the network. +Assuming the block is legitimate they receive some number of coins as +compensation. + +That compensation is what keeps a blockchain based currency going. If there +were no compensation there would be no reason to mine except out of goodwill, so +far fewer people would do it. Since the chain can be compromised if a malicious +group has more computing power than all legitimate miners combined, having few +legitimate miners is a serious problem. + +In the case of namecoins, there's even more reason to involve a currency. Since +you have to spend money to make changes to the chain there's a disincentive for +attackers (read: idiots) to spam the chain with frivolous changes to keys. + +#### Why a *new* currency? + +I'll admit, it's a bit annoying to see all these altcoins popping up. I'm sure +many of them have some solid ideas backing them, but it also makes things +confusing for newcomers and dilutes the "market" of cryptocoin users; the more +users a particular chain has, the stronger it is. If we have many chains, all we +have are a bunch of weak chains. + +The exception to this gripe, for me, is namecoin. When I was first thinking +about this problem my instinct was to just use the existing bitcoin blockchain +as a key-value storage. However, the maintainers of the bitcoin clients +(who are, in effect, the maintainers of the chain) don't want the bitcoin +blockchain polluted with non-commerce related data. At first I disagreed; it's a +P2P network, no-one gets to say what I can or can't use the chain for! And +that's true. But things work out better for everyone involved if there's two +chains. + +Bitcoin is a currency. Namecoin is a key-value store (with a currency as its +driving force). Those are two completely different use-cases, with two +completely difference usage characteristics. And we don't know yet what those +characteristics are, or if they'll change. If the chain-maintainers have to deal +with a mingled chain we could very well be tying their hands with regards to +what they can or can't change with regards to the behavior of the chain, since +improving performance for one use-case may hurt the performance of the other. +With two separate chains the maintainers of each are free to do what they see +fit to keep their respective chains operating as smoothly as possible. +Additionally, if for some reason bitcoins fall by the wayside, namecoin will +still have a shot at continuing operation since it isn't tied to the former. +Tldr: separation of concerns. + +## Namecoin as an alternative to SSL + +And now to tie it all together. + +There are already a number of proposed formats for standardizing how we store +data on the namecoin chain so that we can start building tools around it. I'm +not hugely concerned with the particulars of those standards, only that we can, +in some way, standardize on attaching a public key (or a fingerprint of one) to +some key on the namecoin blockchain. When you visit a website, the server +would then send both its public key and the namecoin chain key to be checked +against to the browser, and the browser would validate that the public key it +received is the same as the one on the namecoin chain. + +The main issue with this is that it requires another round-trip when visiting a +website: One for DNS, and one to check the namecoin chain. And where would this +chain even be hosted? + +My proposition is there would exist a number of publicly available servers +hosting a namecoind process that anyone in the world could send requests for +values on the chain. Browsers could then be made with a couple of these +hardwired in. ISPs could also run their own copies at various points in their +network to improve response-rates and decrease load on the globally public +servers. Furthermore, the paranoid could host their own and be absolutely sure +that the data they're receiving is valid. + +If the above scheme sounds a lot like what we currently use for DNS, that's +because it is. In fact, one of namecoin's major goals is that it be used as a +replacement for DNS, and most of the talk around it is focused on this subject. +DNS has many of the same problems as SSL, namely single-point-of-failure and +that it's run by a centralized agency that we have to pay arbitrarily high fees +to. By switching our DNS and SSL infrastructure to use namecoin we could kill +two horribly annoying, monopolized, expensive birds with a single stone. + +That's it. If we use the namecoin chain as a DNS service we get security almost +for free, along with lots of other benefits. To make this happen we need +cooperation from browser makers, and to standardize on a simple way of +retrieving DNS information from the chain that the browsers can use. The +protocol doesn't need to be very complex, I think HTTP/REST should suffice, +since the meat of the data will be embedded in the JSON value on the namecoin +chain. + +If you want to contribute or learn more please check out [namecoin][nmc] and +specifically the [d namespace proposal][dns] for it. + +[cryptic]: http://cryptic.io +[bitcoins]: http://vimeo.com/63502573 +[dht]: http://www.globule.org/publi/SDST_acmcs2009.pdf +[nsa]: https://www.schneier.com/blog/archives/2013/09/new_nsa_leak_sh.html +[nmc]: http://dot-bit.org/Main_Page +[dns]: http://dot-bit.org/Namespace:Domain_names_v2.0 diff --git a/static/src/_posts/2014-01-11-diamond-square.md b/static/src/_posts/2014-01-11-diamond-square.md new file mode 100644 index 0000000..528c953 --- /dev/null +++ b/static/src/_posts/2014-01-11-diamond-square.md @@ -0,0 +1,495 @@ +--- +title: Diamond Square +description: >- + Tackling the problem of semi-realistic looking terrain generation in + clojure. +updated: 2018-09-06 +tags: tech art +--- + +![terrain][terrain] + +I recently started looking into the diamond-square algorithm (you can find a +great article on it [here][diamondsquare]). The following is a short-ish +walkthrough of how I tackled the problem in clojure and the results. You can +find the [leiningen][lein] repo [here][repo] and follow along within that, or +simply read the code below to get an idea. + +Also, Marco ported my code into clojurescript, so you can get random terrain +in your browser. [Check it out!][marco] + +```clojure +(ns diamond-square.core) + +; == The Goal == +; Create a fractal terrain generator using clojure + +; == The Algorithm == +; Diamond-Square. We start with a grid of points, each with a height of 0. +; +; 1. Take each corner point of the square, average the heights, and assign that +; to be the height of the midpoint of the square. Apply some random error to +; the midpoint. +; +; 2. Creating a line from the midpoint to each corner we get four half-diamonds. +; Average the heights of the points (with some random error) and assign the +; heights to the midpoints of the diamonds. +; +; 3. We now have four square sections, start at 1 for each of them (with +; decreasing amount of error for each iteration). +; +; This picture explains it better than I can: +; https://blog.mediocregopher.com/img/diamond-square/dsalg.png +; (http://nbickford.wordpress.com/2012/12/21/creating-fake-landscapes/dsalg/) +; +; == The Strategy == +; We begin with a vector of vectors of numbers, and iterate over it, filling in +; spots as they become available. Our grid will have the top-left being (0,0), +; y being pointing down and x going to the right. The outermost vector +; indicating row number (y) and the inner vectors indicate the column number (x) +; +; = Utility = +; First we create some utility functions for dealing with vectors of vectors. + +(defn print-m + "Prints a grid in a nice way" + [m] + (doseq [n m] + (println n))) + +(defn get-m + "Gets a value at the given x,y coordinate of the grid, with [0,0] being in the + top left" + [m x y] + ((m y) x)) + +(defn set-m + "Sets a value at the given x,y coordinat of the grid, with [0,0] being in the + top left" + [m x y v] + (assoc m y + (assoc (m y) x v))) + +(defn add-m + "Like set-m, but adds the given value to the current on instead of overwriting + it" + [m x y v] + (set-m m x y + (+ (get-m m x y) v))) + +(defn avg + "Returns the truncated average of all the given arguments" + [& l] + (int (/ (reduce + l) (count l)))) + +; = Grid size = +; Since we're starting with a blank grid we need to find out what sizes the +; grids can be. For convenience the size (height and width) should be odd, so we +; easily get a midpoint. And on each iteration we'll be halfing the grid, so +; whenever we do that the two resultrant grids should be odd and halfable as +; well, and so on. +; +; The algorithm that fits this is size = 2^n + 1, where 1 <= n. For the rest of +; this guide I'll be referring to n as the "degree" of the grid. + + +(def exp2-pre-compute + (vec (map #(int (Math/pow 2 %)) (range 31)))) + +(defn exp2 + "Returns 2^n as an integer. Uses pre-computed values since we end up doing + this so much" + [n] + (exp2-pre-compute n)) + +(def grid-sizes + (vec (map #(inc (exp2 %)) (range 1 31)))) + +(defn grid-size [degree] + (inc (exp2 degree))) + +; Available grid heights/widths are as follows: +;[3 5 9 17 33 65 129 257 513 1025 2049 4097 8193 16385 32769 65537 131073 +;262145 524289 1048577 2097153 4194305 8388609 16777217 33554433 67108865 +;134217729 268435457 536870913 1073741825]) + +(defn blank-grid + "Generates a grid of the given degree, filled in with zeros" + [degree] + (let [gsize (grid-size degree)] + (vec (repeat gsize + (vec (repeat gsize 0)))))) + +(comment + (print-m (blank-grid 3)) +) + +; = Coordinate Pattern (The Tricky Part) = +; We now have to figure out which coordinates need to be filled in on each pass. +; A pass is defined as a square step followed by a diamond step. The next pass +; will be the square/dimaond steps on all the smaller squares generated in the +; pass. It works out that the number of passes required to fill in the grid is +; the same as the degree of the grid, where the first pass is 1. +; +; So we can easily find patterns in the coordinates for a given degree/pass, +; I've laid out below all the coordinates for each pass for a 3rd degree grid +; (which is 9x9). + +; Degree 3 Pass 1 Square +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . . . 1 . . . .] (4,4) +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . . . . . . . .] + +; Degree 3 Pass 1 Diamond +; [. . . . 2 . . . .] (4,0) +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . . . . . . . .] +; [2 . . . . . . . 2] (0,4) (8,4) +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . . . 2 . . . .] (4,8) + +; Degree 3 Pass 2 Square +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . 3 . . . 3 . .] (2,2) (6,2) +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . . . . . . . .] +; [. . 3 . . . 3 . .] (2,6) (6,6) +; [. . . . . . . . .] +; [. . . . . . . . .] + +; Degree 3 Pass 2 Diamond +; [. . 4 . . . 4 . .] (2,0) (6,0) +; [. . . . . . . . .] +; [4 . . . 4 . . . 4] (0,2) (4,2) (8,2) +; [. . . . . . . . .] +; [. . 4 . . . 4 . .] (2,4) (6,4) +; [. . . . . . . . .] +; [4 . . . 4 . . . 4] (0,6) (4,6) (8,6) +; [. . . . . . . . .] +; [. . 4 . . . 4 . .] (2,8) (6,8) + +; Degree 3 Pass 3 Square +; [. . . . . . . . .] +; [. 5 . 5 . 5 . 5 .] (1,1) (3,1) (5,1) (7,1) +; [. . . . . . . . .] +; [. 5 . 5 . 5 . 5 .] (1,3) (3,3) (5,3) (7,3) +; [. . . . . . . . .] +; [. 5 . 5 . 5 . 5 .] (1,5) (3,5) (5,5) (7,5) +; [. . . . . . . . .] +; [. 5 . 5 . 5 . 5 .] (1,7) (3,7) (5,7) (7,7) +; [. . . . . . . . .] + +; Degree 3 Pass 3 Square +; [. 6 . 6 . 6 . 6 .] (1,0) (3,0) (5,0) (7,0) +; [6 . 6 . 6 . 6 . 6] (0,1) (2,1) (4,1) (6,1) (8,1) +; [. 6 . 6 . 6 . 6 .] (1,2) (3,2) (5,2) (7,2) +; [6 . 6 . 6 . 6 . 6] (0,3) (2,3) (4,3) (6,3) (8,3) +; [. 6 . 6 . 6 . 6 .] (1,4) (3,4) (5,4) (7,4) +; [6 . 6 . 6 . 6 . 6] (0,5) (2,5) (4,5) (6,5) (8,5) +; [. 6 . 6 . 6 . 6 .] (1,6) (3,6) (5,6) (7,6) +; [6 . 6 . 6 . 6 . 6] (0,7) (2,7) (4,7) (6,7) (8,7) +; [. 6 . 6 . 6 . 6 .] (1,8) (3,8) (5,8) (7,8) +; +; I make two different functions, one to give the coordinates for the square +; portion of each pass and one for the diamond portion of each pass. To find the +; actual patterns it was useful to first look only at the pattern in the +; y-coordinates, and figure out how that translated into the pattern for the +; x-coordinates. + +(defn grid-square-coords + "Given a grid degree and pass number, returns all the coordinates which need + to be computed for the square step of that pass" + [degree pass] + (let [gsize (grid-size degree) + start (exp2 (- degree pass)) + interval (* 2 start) + coords (map #(+ start (* interval %)) + (range (exp2 (dec pass))))] + (mapcat (fn [y] + (map #(vector % y) coords)) + coords))) +; +; (grid-square-coords 3 2) +; => ([2 2] [6 2] [2 6] [6 6]) + +(defn grid-diamond-coords + "Given a grid degree and a pass number, returns all the coordinates which need + to be computed for the diamond step of that pass" + [degree pass] + (let [gsize (grid-size degree) + interval (exp2 (- degree pass)) + num-coords (grid-size pass) + coords (map #(* interval %) (range 0 num-coords))] + (mapcat (fn [y] + (if (even? (/ y interval)) + (map #(vector % y) (take-nth 2 (drop 1 coords))) + (map #(vector % y) (take-nth 2 coords)))) + coords))) + +; (grid-diamond-coords 3 2) +; => ([2 0] [6 0] [0 2] [4 2] [8 2] [2 4] [6 4] [0 6] [4 6] [8 6] [2 8] [6 8]) + +; = Height Generation = +; We now work on functions which, given a coordinate, will return what value +; coordinate will have. + +(defn avg-points + "Given a grid and an arbitrary number of points (of the form [x y]) returns + the average of all the given points that are on the map. Any points which are + off the map are ignored" + [m & coords] + (let [grid-size (count m)] + (apply avg + (map #(apply get-m m %) + (filter + (fn [[x y]] + (and (< -1 x) (> grid-size x) + (< -1 y) (> grid-size y))) + coords))))) + +(defn error + "Returns a number between -e and e, inclusive" + [e] + (- (rand-int (inc (* 2 e))) e)) + +; The next function is a little weird. It primarily takes in a point, then +; figures out the distance from that point to the points we'll take the average +; of. The locf (locator function) is used to return back the actual points to +; use. For the square portion it'll be the points diagonal from the given one, +; for the diamond portion it'll be the points to the top/bottom/left/right from +; the given one. +; +; Once it has those points, it finds the average and applies the error. The +; error function is nothing more than a number between -interval and +interval, +; where interval is the distance between the given point and one of the averaged +; points. It is important that the error decreases the more passes you do, which +; is why the interval is used. +; +; The error function is what should be messed with primarily if you want to +; change what kind of terrain you generate (a giant mountain instead of +; hills/valleys, for example). The one we use is uniform for all intervals, so +; it generates a uniform terrain. + +(defn- grid-fill-point + [locf m degree pass x y] + (let [interval (exp2 (- degree pass)) + leftx (- x interval) + rightx (+ x interval) + upy (- y interval) + downy (+ y interval) + v (apply avg-points m + (locf x y leftx rightx upy downy))] + (add-m m x y (+ v (error interval))))) + +(def grid-fill-point-square + "Given a grid, the grid's degree, the current pass number, and a point on the + grid, fills in that point with the average (plus some error) of the + appropriate corner points, and returns the resultant grid" + (partial grid-fill-point + (fn [_ _ leftx rightx upy downy] + [[leftx upy] + [rightx upy] + [leftx downy] + [rightx downy]]))) + +(def grid-fill-point-diamond + "Given a grid, the grid's degree, the current pass number, and a point on the + grid, fills in that point with the average (plus some error) of the + appropriate edge points, and returns the resultant grid" + (partial grid-fill-point + (fn [x y leftx rightx upy downy] + [[leftx y] + [rightx y] + [x upy] + [x downy]]))) + +; = Filling in the Grid = +; We finally compose the functions we've been creating to fill in the entire +; grid + +(defn- grid-fill-point-passes + "Given a grid, a function to fill in coordinates, and a function to generate + those coordinates, fills in all coordinates for a given pass, returning the + resultant grid" + [m fill-f coord-f degree pass] + (reduce + (fn [macc [x y]] (fill-f macc degree pass x y)) + m + (coord-f degree pass))) + +(defn grid-pass + "Given a grid and a pass number, does the square then the diamond portion of + the pass" + [m degree pass] + (-> m + (grid-fill-point-passes + grid-fill-point-square grid-square-coords degree pass) + (grid-fill-point-passes + grid-fill-point-diamond grid-diamond-coords degree pass))) + +; The most important function in this guide, does all the work +(defn terrain + "Given a grid degree, generates a uniformly random terrain on a grid of that + degree" + ([degree] + (terrain (blank-grid degree) degree)) + ([m degree] + (reduce + #(grid-pass %1 degree %2) + m + (range 1 (inc degree))))) + +(comment + (print-m + (terrain 5)) +) + +; == The Results == +; We now have a generated terrain, probably. We should check it. First we'll +; create an ASCII representation. But to do that we'll need some utility +; functions. + +(defn max-terrain-height + "Returns the maximum height found in the given terrain grid" + [m] + (reduce max + (map #(reduce max %) m))) + +(defn min-terrain-height + "Returns the minimum height found in the given terrain grid" + [m] + (reduce min + (map #(reduce min %) m))) + +(defn norm + "Given x in the range (A,B), normalizes it into the range (0,new-height)" + [A B new-height x] + (int (/ (* (- x A) new-height) (- B A)))) + +(defn normalize-terrain + "Given a terrain map and a number of \"steps\", normalizes the terrain so all + heights in it are in the range (0,steps)" + [m steps] + (let [max-height (max-terrain-height m) + min-height (min-terrain-height m) + norm-f (partial norm min-height max-height steps)] + (vec (map #(vec (map norm-f %)) m)))) + +; We now define which ASCII characters we want to use for which heights. The +; vector starts with the character for the lowest height and ends with the +; character for the heighest height. + +(def tiles + [\~ \~ \" \" \x \x \X \$ \% \# \@]) + +(defn tile-terrain + "Given a terrain map, converts it into an ASCII tile map" + [m] + (vec (map #(vec (map tiles %)) + (normalize-terrain m (dec (count tiles)))))) + +(comment + (print-m + (tile-terrain + (terrain 5))) + +; [~ ~ " " x x x X % $ $ $ X X X X X X $ x x x X X X x x x x " " " ~] +; [" ~ " " x x X X $ $ $ X X X X X X X X X X X X X X x x x x " " " "] +; [" " " x x x X X % $ % $ % $ $ X X X X $ $ $ X X X X x x x x " " "] +; [" " " x x X $ % % % % % $ % $ $ X X $ $ $ $ X X x x x x x x " " x] +; [" x x x x X $ $ # % % % % % % $ X $ X X % $ % X X x x x x x x x x] +; [x x x X $ $ $ % % % % % $ % $ $ $ % % $ $ $ $ X X x x x x x x x x] +; [X X X $ % $ % % # % % $ $ % % % % $ % $ $ X $ X $ X X x x x X x x] +; [$ $ X $ $ % $ % % % % $ $ $ % # % % % X X X $ $ $ X X X x x x x x] +; [% X X % % $ % % % $ % $ % % % # @ % $ $ X $ X X $ X x X X x x x x] +; [$ $ % % $ $ % % $ $ X $ $ % % % % $ $ X $ $ X X X X X X x x x x x] +; [% % % X $ $ % $ $ X X $ $ $ $ % % $ $ X X X $ X X X x x X x x X X] +; [$ $ $ X $ $ X $ X X X $ $ $ $ % $ $ $ $ $ X $ X x X X X X X x X X] +; [$ $ $ $ X X $ X X X X X $ % % % % % $ X $ $ $ X x X X X $ X X $ $] +; [X $ $ $ $ $ X X X X X X X % $ % $ $ $ X X X X X x x X X x X X $ $] +; [$ $ X X $ X X x X $ $ X X $ % X X X X X X X X X x X X x x X X X X] +; [$ $ X X X X X X X $ $ $ $ $ X $ X X X X X X X x x x x x x x X X X] +; [% % % $ $ X $ X % X X X % $ $ X X X X X X x x x x x x x x x X X $] +; [$ % % $ $ $ X X $ $ $ $ $ $ X X X X x X x x x x " x x x " x x x x] +; [$ X % $ $ $ $ $ X X X X X $ $ X X X X X X x x " " " " " " " " x x] +; [$ X $ $ % % $ X X X $ X X X x x X X x x x x x " " " " " ~ " " " "] +; [$ $ X X % $ % X X X X X X X X x x X X X x x x " " " " " " ~ " " "] +; [$ $ X $ % $ $ X X X X X X x x x x x x x x x " " " " " " " " " ~ ~] +; [$ $ $ $ $ X X $ X X X X X x x x x x x x x " " " " " " " ~ " " " ~] +; [$ % X X $ $ $ $ X X X X x x x x x x x x x x " " " " ~ " " ~ " " ~] +; [% $ $ X $ X $ X $ X $ X x x x x x x x x x x " " " " ~ ~ ~ " ~ " ~] +; [$ X X X X $ $ $ $ $ X x x x x x x x x x x " " " " ~ ~ ~ ~ ~ ~ ~ ~] +; [X x X X x X X X X X X X X x x x x x x x x x " " " ~ ~ " " ~ ~ ~ ~] +; [x x x x x x X x X X x X X X x x x x x x x " x " " " " " ~ ~ ~ ~ ~] +; [x x x x x x x x X X X X $ X X x X x x x x x x x x " ~ ~ ~ ~ ~ ~ ~] +; [" x x x x x X x X X X X X X X X X x x x x x x " " " " ~ ~ ~ ~ ~ ~] +; [" " " x x x X X X X $ $ $ X X X X X X x x x x x x x x " " ~ ~ ~ ~] +; [" " " " x x x X X X X X $ $ X X x X X x x x x x x x " " " " " ~ ~] +; [~ " " x x x x X $ X $ X $ $ X x X x x x x x x x x x x x x " " " ~] +) + +; = Pictures! = +; ASCII is cool, but pictures are better. First we import some java libraries +; that we'll need, then define the colors for each level just like we did tiles +; for the ascii representation. + +(import + 'java.awt.image.BufferedImage + 'javax.imageio.ImageIO + 'java.io.File) + +(def colors + [0x1437AD 0x04859D 0x007D1C 0x007D1C 0x24913C + 0x00C12B 0x38E05D 0xA3A3A4 0x757575 0xFFFFFF]) + +; Finally we reduce over a BufferedImage instance to output every tile as a +; single pixel on it. + +(defn img-terrain + "Given a terrain map and a file name, outputs a png representation of the + terrain map to that file" + [m file] + (let [img (BufferedImage. (count m) (count m) BufferedImage/TYPE_INT_RGB)] + (reduce + (fn [rown row] + (reduce + (fn [coln tile] + (.setRGB img coln rown (colors tile)) + (inc coln)) + 0 row) + (inc rown)) + 0 (normalize-terrain m (dec (count colors)))) + (ImageIO/write img "png" (File. file)))) + +(comment + (img-terrain + (terrain 10) + "resources/terrain.png") + + ; https://blog.mediocregopher.com/img/diamond-square/terrain.png +) + +; == Conclusion == +; There's still a lot of work to be done. The algorithm starts taking a +; non-trivial amount of time around the 10th degree, which is only a 1025x1025px +; image. I need to profile the code and find out where the bottlenecks are. It's +; possible re-organizing the code to use pmaps instead of reduces in some places +; could help. +``` + +[marco]: http://marcopolo.io/diamond-square/ +[terrain]: /img/diamond-square/terrain.png +[diamondsquare]: http://www.gameprogrammer.com/fractal.html +[lein]: https://github.com/technomancy/leiningen +[repo]: https://github.com/mediocregopher/diamond-square diff --git a/static/src/_posts/2014-10-29-erlang-pitfalls.md b/static/src/_posts/2014-10-29-erlang-pitfalls.md new file mode 100644 index 0000000..7358430 --- /dev/null +++ b/static/src/_posts/2014-10-29-erlang-pitfalls.md @@ -0,0 +1,193 @@ +--- +title: Erlang Pitfalls +description: >- + Common pitfalls that people may run into when designing and writing + large-scale erlang applications. +tags: tech +--- + +I've been involved with a large-ish scale erlang project at Grooveshark since +sometime around 2011. I started this project knowing absolutely nothing about +erlang, but now I feel I have accumulated enough knowlege over time that I could +conceivably give some back. Specifically, common pitfalls that people may run +into when designing and writing a large-scale erlang application. Some of these +may show up when searching for them, but some of them you may not even know you +need to search for. + +## now() vs timestamp() + +The cononical way of getting the current timestamp in erlang is to use +`erlang:now()`. This works great at small loads, but if you find your +application slowing down greatly at highly parallel loads and you're calling +`erlang:now()` a lot, it may be the culprit. + +A property of this method you may not realize is that it is monotonically +increasing, meaning even if two processes call it at the *exact* same time they +will both receive different output. This is done through some locking on the +low-level, as well as a bit of math to balance out the time getting out of sync +in the scenario. + +There are situations where fetching always unique timestamps is useful, such as +seeding RNGs and generating unique identifiers for things, but usually when +people fetch a timestamp they just want a timestamp. For these cases, +`os:timestamp()` can be used. It is not blocked by any locks, it simply returns +the time. + +## The rpc module is slow + +The built-in `rpc` module is slower than you'd think. This mostly stems from it +doing a lot of extra work for every `call` and `cast` that you do, ensuring that +certain conditions are accounted for. If, however, it's sufficient for the +calling side to know that a call timed-out on them and not worry about it any +further you may benefit from simply writing your own rpc module. Alternatively, +use [one which already exists](https://github.com/cloudant/rexi). + +## Don't send anonymous functions between nodes + +One of erlang's niceties is transparent message sending between two phsyical +erlang nodes. Once nodes are connected, a process on one can send any message to +a process on the other exactly as if they existed on the same node. This is fine +for many data-types, but for anonymous functions it should be avoided. + +For example: + +```erlang +RemotePid ! {fn, fun(I) -> I + 1 end}. +``` + +Would be better written as + +```erlang +incr(I) -> + I + 1. + +RemotePid ! {fn, ?MODULE, incr}. +``` + +and then using an `apply` on the RemotePid to actually execute the function. + +This is because hot-swapping code messes with anonymous functions quite a bit. +Erlang isn't actually sending a function definition across the wire; it's simply +sending a reference to a function. If you've changed the code within the +anonymous function on a node, that reference changes. The sending node is +sending a reference to a function which may not exist anymore on the receiving +node, and you'll get a weird error which Google doesn't return many results for. + +Alternatively, if you simply send atoms across the wire and use `apply` on the +other side, only atoms are sent and the two nodes involved can have totally +different ideas of what the function itself does without any problems. + +## Hot-swapping code is a convenience, not a crutch + +Hot swapping code is the bees-knees. It lets you not have to worry about +rolling-restarts for trivial code changes, and so adds stability to your +cluster. My warning is that you should not rely on it. If your cluster can't +survive a node being restarted for a code change, then it can't survive if that +node fails completely, or fails and comes back up. Design your system pretending +that hot-swapping does not exist, and only once you've done that allow yourself +to use it. + +## GC sometimes needs a boost + +Erlang garbage collection (GC) acts on a per-erlang-process basis, meaning that +each process decides on its own to garbage collect itself. This is nice because +it means stop-the-world isn't a problem, but it does have some interesting +effects. + +We had a problem with our node memory graphs looking like an upwards facing +line, instead of a nice sinusoid relative to the number of connections during +the day. We couldn't find a memory leak *anywhere*, and so started profiling. We +found that the memory seemed to be comprised of mostly binary data in process +heaps. On a hunch my coworker Mike Cugini (who gets all the credit for this) ran +the following on a node: + +```erlang +lists:foreach(erlang:garbage_collect/1, erlang:processes()). +``` + +and saw memory drop in a huge way. We made that code run every 10 minutes or so +and suddenly our memory problem went away. + +The problem is that we had a lot of processes which individually didn't have +much heap data, but all-together were crushing the box. Each didn't think it had +enough to garbage collect very often, so memory just kept going up. Calling the +above forces all processes to garbage collect, and thus throw away all those +little binary bits they were hoarding. + +## These aren't the solutions you are looking for + +The `erl` process has tons of command-line options which allow you to tweak all +kinds of knobs. We've had tons of performance problems with our application, as +of yet not a single one has been solved with turning one of these knobs. They've +all been design issues or just run-of-the-mill bugs. I'm not saying the knobs +are *never* useful, but I haven't seen it yet. + +## Erlang processes are great, except when they're not + +The erlang model of allowing processes to manage global state works really well +in many cases. Possibly even most cases. There are, however, times when it +becomes a performance problem. This became apparent in the project I was working +on for Grooveshark, which was, at its heart, a pubsub server. + +The architecture was very simple: each channel was managed by a process, client +connection processes subscribed to that channel and received publishes from it. +Easy right? The problem was that extremely high volume channels were simply not +able to keep up with the load. The channel process could do certain things very +fast, but there were some operations which simply took time and slowed +everything down. For example, channels could have arbitrary properties set on +them by their owners. Retrieving an arbitrary property from a channel was a +fairly fast operation: client `call`s the channel process, channel process +immediately responds with the property value. No blocking involved. + +But as soon as there was any kind of call which required the channel process to +talk to yet *another* process (unfortunately necessary), things got hairy. On +high volume channels publishes/gets/set operations would get massively backed up +in the message queue while the process was blocked on another process. We tried +many things, but ultimately gave up on the process-per-channel approach. + +We instead decided on keeping *all* channel state in a transactional database. +When client processes "called" operations on a channel, they really are just +acting on the database data inline, no message passing involved. This means that +read-only operations are super-fast because there is minimal blocking, and if +some random other process is being slow it only affects the one client making +the call which is causing it to be slow, and not holding up a whole host of +other clients. + +## Mnesia might not be what you want + +This one is probably a bit controversial, and definitely subject to use-cases. +Do your own testing and profiling, find out what's right for you. + +Mnesia is erlang's solution for global state. It's an in-memory transactional +database which can scale to N nodes and persist to disk. It is hosted +directly in the erlang processes memory so you interact with it in erlang +directly in your code; no calling out to database drivers and such. Sounds great +right? + +Unfortunately mnesia is not a very full-featured database. It is essentially a +key-value store which can hold arbitrary erlang data-types, albeit in a set +schema which you lay out for it during startup. This means that more complex +types like sorted sets and hash maps (although this was addressed with the +introduction of the map data-type in R17) are difficult to work with within +mnesia. Additionally, erlang's data model of immutability, while awesome +usually, can bite you here because it's difficult (impossible?) to pull out +chunks of data within a record without accessing the whole record. + +For example, when retrieving the list of processes subscribed to a channel our +application doesn't simply pull the full list and iterate over it. This is too +slow, and in some cases the subscriber list was so large it wasn't actually +feasible. The channel process wasn't cleaning up its heap fast enough, so +multiple publishes would end up with multiple copies of the giant list in +memory. This became a problem. Instead we chain spawned processes, each of which +pull a set chunk of the subsciber list, and iterate over that. This is very +difficult to implement in mnesia without pulling the full subscriber list into +the process' memory at some point in the process. + +It is, however, fairly trivial to implement in redis using sorted sets. For this +case, and many other cases after, the motto for performance improvements became +"stick it in redis". The application is at the point where *all* state which +isn't directly tied to a specific connection is kept in redis, encoded using +`term_to_binary`. The performance hit of going to an outside process for data +was actually much less than we'd originally thought, and ended up being a plus +since we had much more freedom to do interesting hacks to speedup up our +accesses. diff --git a/static/src/_posts/2015-03-11-rabbit-hole.md b/static/src/_posts/2015-03-11-rabbit-hole.md new file mode 100644 index 0000000..0bea0b5 --- /dev/null +++ b/static/src/_posts/2015-03-11-rabbit-hole.md @@ -0,0 +1,166 @@ +--- +title: Rabbit Hole +description: >- + Complex systems sometimes require complex debugging. +tags: tech +--- + +We've begun rolling out [SkyDNS][skydns] at my job, which has been pretty neat. +We're basing a couple future projects around being able to use it, and it's made +dynamic configuration and service discovery nice and easy. + +This post chronicles catching a bug because of our switch to SkyDNS, and how we +discover its root cause. I like to call these kinds of bugs "rabbit holes"; they +look shallow at first, but anytime you make a little progress forward a little +more is always required, until you discover the ending somewhere totally +unrelated to the start. + +## The Bug + +We are seeing *tons* of these in the SkyDNS log: + +``` +[skydns] Feb 20 17:21:15.168 INFO | no nameservers defined or name too short, can not forward +``` + +I fire up tcpdump to see if I can see anything interesting, and sure enough run +across a bunch of these: + +``` +# tcpdump -vvv -s 0 -l -n port 53 +tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes + ... + $fen_ip.50257 > $skydns_ip.domain: [udp sum ok] 16218+ A? unknown. (25) + $fen_ip.27372 > $skydns_ip.domain: [udp sum ok] 16218+ A? unknown. (25) + $fen_ip.35634 > $skydns_ip.domain: [udp sum ok] 59227+ A? unknown. (25) + $fen_ip.64363 > $skydns_ip.domain: [udp sum ok] 59227+ A? unknown. (25) +``` + +It appears that some of our front end nodes (FENs) are making tons of DNS +fequests trying to find the A record of `unknown`. Something on our FENs is +doing something insane and is breaking. + +## The FENs + +Hopping over to my favorite FEN we're able to see the packets in question +leaving on a tcpdump as well, but that's not helpful for finding the root cause. +We have lots of processes running on the FENs and any number of them could be +doing something crazy. + +We fire up sysdig, which is similar to systemtap and strace in that it allows +you to hook into the kernel and view various kernel activites in real time, but +it's easier to use than both. The following command dumps all UDP packets being +sent and what process is sending them: + +``` +# sysdig fd.l4proto=udp +... +2528950 22:17:35.260606188 0 php-fpm (21477) < connect res=0 tuple=$fen_ip:61173->$skydns_ip:53 +2528961 22:17:35.260611327 0 php-fpm (21477) > sendto fd=102(<4u>$fen_ip:61173->$skydns_ip:53) size=25 tuple=NULL +2528991 22:17:35.260631917 0 php-fpm (21477) < sendto res=25 data=.r...........unknown..... +2530470 22:17:35.261879032 0 php-fpm (21477) > ioctl fd=102(<4u>$fen_ip:61173->$skydns_ip:53) request=541B argument=7FFF82DC8728 +2530472 22:17:35.261880574 0 php-fpm (21477) < ioctl res=0 +2530474 22:17:35.261881226 0 php-fpm (21477) > recvfrom fd=102(<4u>$fen_ip:61173->$skydns_ip:53) size=1024 +2530476 22:17:35.261883424 0 php-fpm (21477) < recvfrom res=25 data=.r...........unknown..... tuple=$skydns_ip:53->$fen_ip:61173 +2530485 22:17:35.261888997 0 php-fpm (21477) > close fd=102(<4u>$fen_ip:61173->$skydns_ip:53) +2530488 22:17:35.261892626 0 php-fpm (21477) < close res=0 +``` + +Aha! We can see php-fpm is requesting something over udp with the string +`unknown` in it. We've now narrowed down the guilty process, the rest should be +easy right? + +## Which PHP? + +Unfortunately we're a PHP shop; knowing that php-fpm is doing something on a FEN +narrows down the guilty codebase little. Taking the FEN out of our load-balancer +stops the requests for `unknown`, so we *can* say that it's some user-facing +code that is the culprit. Our setup on the FENs involves users hitting nginx +for static content and nginx proxying PHP requests back to php-fpm. Since all +our virtual domains are defined in nginx, we are able to do something horrible. + +On the particular FEN we're on we make a guess about which virtual domain the +problem is likely coming from (our main app), and proxy all traffic from all +other domains to a different FEN. We still see requests for `unknown` leaving +the box, so we've narrowed the problem down a little more. + +## The Despair + +Nothing in our code is doing any direct DNS calls as far as we can find, and we +don't see any places PHP might be doing it for us. We have lots of PHP +extensions in place, all written in C and all black boxes; any of them could be +the culprit. Grepping through the likely candidates' source code for the string +`unknown` proves fruitless. + +We try xdebug at this point. xdebug is a profiler for php which will create +cachegrind files for the running code. With cachegrind you can see every +function which was ever called, how long spent within each function, a full +call-graph, and lots more. Unfortunately xdebug outputs cachegrind files on a +per-php-fpm-process basis, and overwrites the previous file on each new request. +So xdebug is pretty much useless, since what is in the cachegrind file isn't +necessarily what spawned the DNS request. + +## Gotcha (sorta) + +We turn back to the tried and true method of dumping all the traffic using +tcpdump and perusing through that manually. + +What we find is that nearly everytime there is a DNS request for `unknown`, if +we scroll up a bit there is (usually) a particular request to memcache. The +requested key is always in the style of `function-name:someid:otherstuff`. When +looking in the code around that function name we find this ominous looking call: + +```php +$ipAddress = getIPAddress(); +$geoipInfo = getCountryInfoFromIP($ipAddress); +``` + +This points us in the right direction. On a hunch we add some debug +logging to print out the `$ipAddress` variable, and sure enough it comes back as +`unknown`. AHA! + +So what we surmise is happening is that for some reason our geoip extension, +which we use to get the location data of an IP address and which +`getCountryInfoFromIP` calls, is seeing something which is *not* an IP address +and trying to resolve it. + +## Gotcha (for real) + +So the question becomes: why are we getting the string `unknown` as an IP +address? + +Adding some debug logging around the area we find before showed that +`$_SERVER['REMOTE_ADDR']`, which is the variable populated with the IP address +of the client, is sometimes `unknown`. We guess that this has something to do +with some magic we are doing on nginx's side to populate `REMOTE_ADDR` with the +real IP address of the client in the case of them going through a proxy. + +Many proxies send along the header `X-Forwarded-For` to indicate the real IP of +the client they're proxying for, otherwise the server would only see the proxy's +IP. In our setup I decided that in those cases we should set the `REMOTE_ADDR` +to the real client IP so our application logic doesn't even have to worry about +it. There are a couple problems with this which render it a bad decision, one +being that if some misbahaving proxy was to, say, start sending +`X-Forwarded-For: unknown` then some written applications might mistake that to +mean the client's IP is `unknown`. + +## The Fix + +The fix here was two-fold: + +1) We now always set `$_SERVER['REMOTE_ADDR']` to be the remote address of the +requests, regardless of if it's a proxy, and also send the application the +`X-Forwarded-For` header to do with as it pleases. + +2) Inside our app we look at all the headers sent and do some processing to +decide what the actual client IP is. PHP can handle a lot more complex logic +than nginx can, so we can do things like check to make sure the IP is an IP, and +also that it's not some NAT'd internal ip, and so forth. + +And that's it. From some weird log messages on our DNS servers to an nginx +mis-configuration on an almost unrelated set of servers, this is one of those +strange bugs that never has a nice solution and goes unsolved for a long time. +Spending the time to dive down the rabbit hole and find the answer is often +tedious, but also often very rewarding. + +[skydns]: https://github.com/skynetservices/skydns diff --git a/static/src/_posts/2015-07-15-go-http.md b/static/src/_posts/2015-07-15-go-http.md new file mode 100644 index 0000000..7da7d6b --- /dev/null +++ b/static/src/_posts/2015-07-15-go-http.md @@ -0,0 +1,547 @@ +--- +title: Go's http package by example +description: >- + The basics of using, testing, and composing apps built using go's net/http + package. +--- + +Go's [http](http://golang.org/pkg/net/http/) package has turned into one of my +favorite things about the Go programming language. Initially it appears to be +somewhat complex, but in reality it can be broken down into a couple of simple +components that are extremely flexible in how they can be used. This guide will +cover the basic ideas behind the http package, as well as examples in using, +testing, and composing apps built with it. + +This guide assumes you have some basic knowledge of what an interface in Go is, +and some idea of how HTTP works and what it can do. + +## Handler + +The building block of the entire http package is the `http.Handler` interface, +which is defined as follows: + +```go +type Handler interface { + ServeHTTP(ResponseWriter, *Request) +} +``` + +Once implemented the `http.Handler` can be passed to `http.ListenAndServe`, +which will call the `ServeHTTP` method on every incoming request. + +`http.Request` contains all relevant information about an incoming http request +which is being served by your `http.Handler`. + +The `http.ResponseWriter` is the interface through which you can respond to the +request. It implements the `io.Writer` interface, so you can use methods like +`fmt.Fprintf` to write a formatted string as the response body, or ones like +`io.Copy` to write out the contents of a file (or any other `io.Reader`). The +response code can be set before you begin writing data using the `WriteHeader` +method. + +Here's an example of an extremely simple http server: + +```go +package main + +import ( + "fmt" + "log" + "net/http" +) + +type helloHandler struct{} + +func (h helloHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "hello, you've hit %s\n", r.URL.Path) +} + +func main() { + err := http.ListenAndServe(":9999", helloHandler{}) + log.Fatal(err) +} +``` + +`http.ListenAndServe` serves requests using the handler, listening on the given +address:port. It will block unless it encounters an error listening, in which +case we `log.Fatal`. + +Here's an example of using this handler with curl: + +``` + ~ $ curl localhost:9999/foo/bar + hello, you've hit /foo/bar +``` + + +## HandlerFunc + +Often defining a full type to implement the `http.Handler` interface is a bit +overkill, especially for extremely simple `ServeHTTP` functions like the one +above. The `http` package provides a helper function, `http.HandlerFunc`, which +wraps a function which has the signature +`func(w http.ResponseWriter, r *http.Request)`, returning an `http.Handler` +which will call it in all cases. + +The following behaves exactly like the previous example, but uses +`http.HandlerFunc` instead of defining a new type. + +```go +package main + +import ( + "fmt" + "log" + "net/http" +) + +func main() { + h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "hello, you've hit %s\n", r.URL.Path) + }) + + err := http.ListenAndServe(":9999", h) + log.Fatal(err) +} +``` + +## ServeMux + +On their own, the previous examples don't seem all that useful. If we wanted to +have different behavior for different endpoints we would end up with having to +parse path strings as well as numerous `if` or `switch` statements. Luckily +we're provided with `http.ServeMux`, which does all of that for us. Here's an +example of it being used: + +```go +package main + +import ( + "fmt" + "log" + "net/http" +) + +func main() { + h := http.NewServeMux() + + h.HandleFunc("/foo", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintln(w, "Hello, you hit foo!") + }) + + h.HandleFunc("/bar", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintln(w, "Hello, you hit bar!") + }) + + h.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(404) + fmt.Fprintln(w, "You're lost, go home") + }) + + err := http.ListenAndServe(":9999", h) + log.Fatal(err) +} +``` + +The `http.ServeMux` is itself an `http.Handler`, so it can be passed into +`http.ListenAndServe`. When it receives a request it will check if the request's +path is prefixed by any of its known paths, choosing the longest prefix match it +can find. We use the `/` endpoint as a catch-all to catch any requests to +unknown endpoints. Here's some examples of it being used: + +``` + ~ $ curl localhost:9999/foo +Hello, you hit foo! + + ~ $ curl localhost:9999/bar +Hello, you hit bar! + + ~ $ curl localhost:9999/baz +You're lost, go home +``` + +`http.ServeMux` has both `Handle` and `HandleFunc` methods. These do the same +thing, except that `Handle` takes in an `http.Handler` while `HandleFunc` merely +takes in a function, implicitly wrapping it just as `http.HandlerFunc` does. + +### Other muxes + +There are numerous replacements for `http.ServeMux` like +[gorilla/mux](http://www.gorillatoolkit.org/pkg/mux) which give you things like +automatically pulling variables out of paths, easily asserting what http methods +are allowed on an endpoint, and more. Most of these replacements will implement +`http.Handler` like `http.ServeMux` does, and accept `http.Handler`s as +arguments, and so are easy to use in conjunction with the rest of the things +I'm going to talk about in this post. + +## Composability + +When I say that the `http` package is composable I mean that it is very easy to +create re-usable pieces of code and glue them together into a new working +application. The `http.Handler` interface is the way all pieces communicate with +each other. Here's an example of where we use the same `http.Handler` to handle +multiple endpoints, each slightly differently: + +```go +package main + +import ( + "fmt" + "log" + "net/http" +) + +type numberDumper int + +func (n numberDumper) ServeHTTP(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "Here's your number: %d\n", n) +} + +func main() { + h := http.NewServeMux() + + h.Handle("/one", numberDumper(1)) + h.Handle("/two", numberDumper(2)) + h.Handle("/three", numberDumper(3)) + h.Handle("/four", numberDumper(4)) + h.Handle("/five", numberDumper(5)) + + h.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(404) + fmt.Fprintln(w, "That's not a supported number!") + }) + + err := http.ListenAndServe(":9999", h) + log.Fatal(err) +} +``` + +`numberDumper` implements `http.Handler`, and can be passed into the +`http.ServeMux` multiple times to serve multiple endpoints. Here's it in action: + +``` + ~ $ curl localhost:9999/one +Here's your number: 1 + ~ $ curl localhost:9999/five +Here's your number: 5 + ~ $ curl localhost:9999/bazillion +That's not a supported number! +``` + +## Testing + +Testing http endpoints is extremely easy in Go, and doesn't even require you to +actually listen on any ports! The `httptest` package provides a few handy +utilities, including `NewRecorder` which implements `http.ResponseWriter` and +allows you to effectively make an http request by calling `ServeHTTP` directly. +Here's an example of a test for our previously implemented `numberDumper`, +commented with what exactly is happening: + +```go +package main + +import ( + "fmt" + "net/http" + "net/http/httptest" + . "testing" +) + +func TestNumberDumper(t *T) { + // We first create the http.Handler we wish to test + n := numberDumper(1) + + // We create an http.Request object to test with. The http.Request is + // totally customizable in every way that a real-life http request is, so + // even the most intricate behavior can be tested + r, _ := http.NewRequest("GET", "/one", nil) + + // httptest.Recorder implements the http.ResponseWriter interface, and as + // such can be passed into ServeHTTP to receive the response. It will act as + // if all data being given to it is being sent to a real client, when in + // reality it's being buffered for later observation + w := httptest.NewRecorder() + + // Pass in our httptest.Recorder and http.Request to our numberDumper. At + // this point the numberDumper will act just as if it was responding to a + // real request + n.ServeHTTP(w, r) + + // httptest.Recorder gives a number of fields and methods which can be used + // to observe the response made to our request. Here we check the response + // code + if w.Code != 200 { + t.Fatalf("wrong code returned: %d", w.Code) + } + + // We can also get the full body out of the httptest.Recorder, and check + // that its contents are what we expect + body := w.Body.String() + if body != fmt.Sprintf("Here's your number: 1\n") { + t.Fatalf("wrong body returned: %s", body) + } + +} +``` + +In this way it's easy to create tests for your individual components that you +are using to build your application, keeping the tests near to the functionality +they're testing. + +Note: if you ever do need to spin up a test server in your tests, `httptest` +also provides a way to create a server listening on a random open port for use +in tests as well. + +## Middleware + +Serving endpoints is nice, but often there's functionality you need to run for +*every* request before the actual endpoint's handler is run. For example, access +logging. A middleware component is one which implements `http.Handler`, but will +actually pass the request off to another `http.Handler` after doing some set of +actions. The `http.ServeMux` we looked at earlier is actually an example of +middleware, since it passes the request off to another `http.Handler` for actual +processing. Here's an example of our previous example with some logging +middleware: + +```go +package main + +import ( + "fmt" + "log" + "net/http" +) + +type numberDumper int + +func (n numberDumper) ServeHTTP(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "Here's your number: %d\n", n) +} + +func logger(h http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + log.Printf("%s requested %s", r.RemoteAddr, r.URL) + h.ServeHTTP(w, r) + }) +} + +func main() { + h := http.NewServeMux() + + h.Handle("/one", numberDumper(1)) + h.Handle("/two", numberDumper(2)) + h.Handle("/three", numberDumper(3)) + h.Handle("/four", numberDumper(4)) + h.Handle("/five", numberDumper(5)) + + h.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(404) + fmt.Fprintln(w, "That's not a supported number!") + }) + + hl := logger(h) + + err := http.ListenAndServe(":9999", hl) + log.Fatal(err) +} +``` + +`logger` is a function which takes in an `http.Handler` called `h`, and returns +a new `http.Handler` which, when called, will log the request it was called with +and then pass off its arguments to `h`. To use it we pass in our +`http.ServeMux`, so all incoming requests will first be handled by the logging +middleware before being passed to the `http.ServeMux`. + +Here's an example log entry which is output when the `/five` endpoint is hit: + +``` +2015/06/30 20:15:41 [::1]:34688 requested /five +``` + +## Middleware chaining + +Being able to chain middleware together is an incredibly useful ability which we +get almost for free, as long as we use the signature +`func(http.Handler) http.Handler`. A middleware component returns the same type +which is passed into it, so simply passing the output of one middleware +component into the other is sufficient. + +However, more complex behavior with middleware can be tricky. For instance, what +if you want a piece of middleware which takes in a parameter upon creation? +Here's an example of just that, with a piece of middleware which will set a +header and its value for all requests: + +```go +package main + +import ( + "fmt" + "log" + "net/http" +) + +type numberDumper int + +func (n numberDumper) ServeHTTP(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "Here's your number: %d\n", n) +} + +func logger(h http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + log.Printf("%s requested %s", r.RemoteAddr, r.URL) + h.ServeHTTP(w, r) + }) +} + +type headerSetter struct { + key, val string + handler http.Handler +} + +func (hs headerSetter) ServeHTTP(w http.ResponseWriter, r *http.Request) { + w.Header().Set(hs.key, hs.val) + hs.handler.ServeHTTP(w, r) +} + +func newHeaderSetter(key, val string) func(http.Handler) http.Handler { + return func(h http.Handler) http.Handler { + return headerSetter{key, val, h} + } +} + +func main() { + h := http.NewServeMux() + + h.Handle("/one", numberDumper(1)) + h.Handle("/two", numberDumper(2)) + h.Handle("/three", numberDumper(3)) + h.Handle("/four", numberDumper(4)) + h.Handle("/five", numberDumper(5)) + + h.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(404) + fmt.Fprintln(w, "That's not a supported number!") + }) + + hl := logger(h) + hhs := newHeaderSetter("X-FOO", "BAR")(hl) + + err := http.ListenAndServe(":9999", hhs) + log.Fatal(err) +} +``` + +And here's the curl output: + +``` + ~ $ curl -i localhost:9999/three + HTTP/1.1 200 OK + X-Foo: BAR + Date: Wed, 01 Jul 2015 00:39:48 GMT + Content-Length: 22 + Content-Type: text/plain; charset=utf-8 + + Here's your number: 3 + +``` + +`newHeaderSetter` returns a function which accepts and returns an +`http.Handler`. Calling that returned function with an `http.Handler` then gets +you an `http.Handler` which will set the header given to `newHeaderSetter` +before continuing on to the given `http.Handler`. + +This may seem like a strange way of organizing this; for this example the +signature for `newHeaderSetter` could very well have looked like this: + +``` +func newHeaderSetter(key, val string, h http.Handler) http.Handler +``` + +And that implementation would have worked fine. But it would have been more +difficult to compose going forward. In the next section I'll show what I mean. + +## Composing middleware with alice + +[Alice](https://github.com/justinas/alice) is a very simple and convenient +helper for working with middleware using the function signature we've been using +thusfar. Alice is used to create and use chains of middleware. Chains can even +be appended to each other, giving even further flexibility. Here's our previous +example with a couple more headers being set, but also using alice to manage the +added complexity. + +```go +package main + +import ( + "fmt" + "log" + "net/http" + + "github.com/justinas/alice" +) + +type numberDumper int + +func (n numberDumper) ServeHTTP(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "Here's your number: %d\n", n) +} + +func logger(h http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + log.Printf("%s requested %s", r.RemoteAddr, r.URL) + h.ServeHTTP(w, r) + }) +} + +type headerSetter struct { + key, val string + handler http.Handler +} + +func (hs headerSetter) ServeHTTP(w http.ResponseWriter, r *http.Request) { + w.Header().Set(hs.key, hs.val) + hs.handler.ServeHTTP(w, r) +} + +func newHeaderSetter(key, val string) func(http.Handler) http.Handler { + return func(h http.Handler) http.Handler { + return headerSetter{key, val, h} + } +} + +func main() { + h := http.NewServeMux() + + h.Handle("/one", numberDumper(1)) + h.Handle("/two", numberDumper(2)) + h.Handle("/three", numberDumper(3)) + h.Handle("/four", numberDumper(4)) + + fiveHS := newHeaderSetter("X-FIVE", "the best number") + h.Handle("/five", fiveHS(numberDumper(5))) + + h.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(404) + fmt.Fprintln(w, "That's not a supported number!") + }) + + chain := alice.New( + newHeaderSetter("X-FOO", "BAR"), + newHeaderSetter("X-BAZ", "BUZ"), + logger, + ).Then(h) + + err := http.ListenAndServe(":9999", chain) + log.Fatal(err) +} +``` + +In this example all requests will have the headers `X-FOO` and `X-BAZ` set, but +the `/five` endpoint will *also* have the `X-FIVE` header set. + +## Fin + +Starting with a simple idea of an interface, the `http` package allows us to +create for ourselves an incredibly useful and flexible (yet still rather simple) +ecosystem for building web apps with re-usable components, all without breaking +our static checks. diff --git a/static/src/_posts/2015-11-21-happy-trees.md b/static/src/_posts/2015-11-21-happy-trees.md new file mode 100644 index 0000000..7fe8d0a --- /dev/null +++ b/static/src/_posts/2015-11-21-happy-trees.md @@ -0,0 +1,236 @@ +--- +title: Happy Trees +description: >- + Visualizing a forest of happy trees. +tags: tech art +--- + +Source code related to this post is available [here](https://github.com/mediocregopher/happy-tree). + +This project was inspired by [this video](https://www.youtube.com/watch?v=_DpzAvb3Vk4), +which you should watch first in order to really understand what's going on. + +My inspiration came from his noting that happification could be done on numbers +in bases other than 10. I immediately thought of hexadecimal, base-16, since I'm +a programmer and that's what I think of. I also was trying to think of how one +would graphically represent a large happification tree, when I realized that +hexadecimal numbers are colors, and colors graphically represent things nicely! + +## Colors + +Colors to computers are represented using 3-bytes, encompassing red, green, and +blue. Each byte is represented by two hexadecimal digits, and they are appended +together. For example `FF0000` represents maximum red (`FF`) added to no green +and no blue. `FF5500` represents maximum red (`FF`), some green (`55`) and no +blue (`00`), which when added together results in kind of an orange color. + +## Happifying colors + +In base 10, happifying a number is done by splitting its digits, squaring each +one individually, and adding the resulting numbers. The principal works the same +for hexadecimal numbers: + +``` +A4F +A*A + 4*4 + F*F +64 + 10 + E1 +155 // 341 in decimal +``` + +So if all colors are 6-digit hexadecimal numbers, they can be happified easily! + +``` +FF5500 +F*F + F*F + 5*5 + 5*5 + 0*0 + 0*0 +E1 + E1 + 19 + 19 + 0 + 0 +0001F4 +``` + +So `FF5500` (an orangish color) happifies to `0001F4` (a darker blue). Since +order of digits doesn't matter, `5F50F0` also happifies to `0001F4`. From this +fact, we can make a tree (hence the happification tree). I can do this process +on every color from `000000` (black) to `FFFFFF` (white), so I will! + +## Representing the tree + +So I know I can represent the tree using color, but there's more to decide on +than that. The easy way to represent a tree would be to simply draw a literal +tree graph, with a circle for each color and lines pointing to its parent and +children. But this is boring, and also if I want to represent *all* colors the +resulting image would be enormous and/or unreadable. + +I decided on using a hollow, multi-level pie-chart. Using the example +of `000002`, it would look something like this: + +![An example of a partial multi-level pie chart](/img/happy-tree/partial.png) + +The inner arc represents the color `000002`. The second arc represents the 15 +different colors which happify into `000002`, each of them may also have their +own outer arc of numbers which happify to them, and so on. + +This representation is nice because a) It looks cool and b) it allows the +melancoils of the hexadecimals to be placed around the happification tree +(numbers which happify into `000001`), which is convenient. It's also somewhat +easier to code than a circle/branch based tree diagram. + +An important feature I had to implement was proportional slice sizes. If I were +to give each child of a color an equal size on that arc's edge the image would +simply not work. Some branches of the tree are extremely deep, while others are +very shallow. If all were given the same space, those deep branches wouldn't +even be representable by a single pixel's width, and would simply fail to show +up. So I implemented proportional slice sizes, where the size of every slice is +determined to be proportional to how many total (recursively) children it has. +You can see this in the above example, where the second level arc is largely +comprised of one giant slice, with many smaller slices taking up the end. + +## First attempt + +My first attempt resulted in this image (click for 5000x5000 version): + +[![Result of first attempt](/img/happy-tree/happy-tree-atmp1-small.png)](/img/happy-tree/happy-tree-atmp1.png) + +The first thing you'll notice is that it looks pretty neat. + +The second thing you'll notice is that there's actually only one melancoil in +the 6-digit hexadecimal number set. The innermost black circle is `000000` which +only happifies to itself, and nothing else will happify to it (sad `000000`). +The second circle represents `000001`, and all of its runty children. And +finally the melancoil, comprised of: + +``` +00000D -> 0000A9 -> 0000B5 -> 000092 -> 000055 -> 00003 -> ... +``` + +The final thing you'll notice (or maybe it was the first, since it's really +obvious) is that it's very blue. Non-blue colors are really only represented as +leaves on their trees and don't ever really have any children of their own, so +the blue and black sections take up vastly more space. + +This makes sense. The number which should generate the largest happification +result, `FFFFFF`, only results in `000546`, which is primarily blue. So in effect +all colors happify to some shade of blue. + +This might have been it, technically this is the happification tree and the +melancoil of 6 digit hexadecimal numbers represented as colors. But it's also +boring, and I wanted to do better. + +## Second attempt + +The root of the problem is that the definition of "happification" I used +resulted in not diverse enough results. I wanted something which would give me +numbers where any of the digits could be anything. Something more random. + +I considered using a hash instead, like md5, but that has its own problems. +There's no gaurantee that any number would actually reach `000001`, which isn't +required but it's a nice feature that I wanted. It also would be unlikely that +there would be any melancoils that weren't absolutely gigantic. + +I ended up redefining what it meant to happify a hexadecimal number. Instead of +adding all the digits up, I first split up the red, green, and blue digits into +their own numbers, happified those numbers, and finally reassembled the results +back into a single number. For example: + +``` +FF5500 +FF, 55, 00 +F*F + F*F, 5*5 + 5*5, 0*0 + 0*0 +1C2, 32, 00 +C23200 +``` + +I drop that 1 on the `1C2`, because it has no place in this system. Sorry 1. + +Simply replacing that function resulted in this image (click for 5000x5000) version: + +[![Result of second attempt](/img/happy-tree/happy-tree-atmp2-small.png)](/img/happy-tree/happy-tree-atmp2.png) + +The first thing you notice is that it's so colorful! So that goal was achieved. + +The second thing you notice is that there's *significantly* more melancoils. +Hundreds, even. Here's a couple of the melancoils (each on its own line): + +``` +00000D -> 0000A9 -> 0000B5 -> 000092 -> 000055 -> 000032 -> ... +000D0D -> 00A9A9 -> 00B5B5 -> 009292 -> 005555 -> 003232 -> ... +0D0D0D -> A9A9A9 -> B5B5B5 -> 929292 -> 555555 -> 323232 -> ... +0D0D32 -> A9A90D -> B5B5A9 -> 9292B5 -> 555592 -> 323255 -> ... +... +``` + +And so on. You'll notice the first melancoil listed is the same as the one from +the first attempt. You'll also notice that the same numbers from the that +melancoil are "re-used" in the rest of them as well. The second coil listed is +the same as the first, just with the numbers repeated in the 3rd and 4th digits. +The third coil has those numbers repeated once more in the 1st and 2nd digits. +The final coil is the same numbers, but with the 5th and 6th digits offset one +place in the rotation. + +The rest of the melancoils in this attempt work out to just be every conceivable +iteration of the above. This is simply a property of the algorithm chosen, and +there's not a whole lot we can do about it. + +## Third attempt + +After talking with [Mr. Marco](/members/#marcopolo) about the previous attempts +I got an idea that would lead me towards more attempts. The main issue I was +having in coming up with new happification algorithms was figuring out what to +do about getting a number greater than `FFFFFF`. Dropping the leading digits +just seemed.... lame. + +One solution I came up with was to simply happify again. And again, and again. +Until I got a number less than or equal to `FFFFFF`. + +With this new plan, I could increase the power by which I'm raising each +individual digit, and drop the strategy from the second attempt of splitting the +number into three parts. In the first attempt I was doing happification to the +power of 2, but what if I wanted to happify to the power of 6? It would look +something like this (starting with the number `34BEEF`): + +``` +34BEEF +3^6 + 4^6 + B^6 + E^6 + E^6 + E^6 + F^6 +2D9 + 1000 + 1B0829 + 72E440 + 72E440 + ADCEA1 +1AEB223 + +1AEB223 is greater than FFFFFF, so we happify again + +1^6 + A^6 + E^6 + B^6 + 2^6 + 2^6 + 3^6 +1 + F4240 + 72E440 + 1B0829 + 40 + 40 + 2D9 +9D3203 +``` + +So `34BEEF` happifies to `9D3203`, when happifying to the power of 6. + +As mentioned before the first attempt in this blog was the 2nd power tree, +here's the trees for the 3rd, 4th, 5th, and 6th powers (each image is a link to +a larger version): + +3rd power: +[![Third attempt, 3rd power](/img/happy-tree/happy-tree-atmp3-pow3-small.png)](/img/happy-tree/happy-tree-atmp3-pow3.png) + +4th power: +[![Third attempt, 4th power](/img/happy-tree/happy-tree-atmp3-pow4-small.png)](/img/happy-tree/happy-tree-atmp3-pow4.png) + +5th power: +[![Third attempt, 5th power](/img/happy-tree/happy-tree-atmp3-pow5-small.png)](/img/happy-tree/happy-tree-atmp3-pow5.png) + +6th power: +[![Third attempt, 6th power](/img/happy-tree/happy-tree-atmp3-pow6-small.png)](/img/happy-tree/happy-tree-atmp3-pow6.png) + +A couple things to note: + +* 3-5 are still very blue. It's not till the 6th power that the distribution + becomes random enough to become very colorful. + +* Some powers have more coils than others. Power of 3 has a lot, and actually a + lot of them aren't coils, but single narcissistic numbers. Narcissistic + numbers are those which happify to themselves. `000000` and `000001` are + narcissistic numbers in all powers, power of 3 has quite a few more. + +* 4 looks super cool. + +Using unsigned 64-bit integers I could theoretically go up to the power of 15. +But I hit a roadblock at power of 7, in that there's actually a melancoil which +occurs whose members are all greater than `FFFFFF`. This means that my strategy +of repeating happifying until I get under `FFFFFF` doesn't work for any numbers +which lead into that coil. diff --git a/static/src/_posts/2017-09-06-brian-bars.md b/static/src/_posts/2017-09-06-brian-bars.md new file mode 100644 index 0000000..2c56272 --- /dev/null +++ b/static/src/_posts/2017-09-06-brian-bars.md @@ -0,0 +1,105 @@ +--- +title: Brian Bars +description: >- + Cheap and easy to make, healthy, vegan, high-carb, high-protein. "The Good + Stuff". +updated: 2018-01-18 +--- + +It actually blows my mind it's been 4 years since I used this blog. It was +previously a tech blog, but then I started putting all my tech-related posts on +[the cryptic blog](https://cryptic.io). As of now this is a lifestyle/travel +blog. The me of 4 years ago would be horrified. + +Now I just have to come up with a lifestyle and do some traveling. + +## Recipe + +This isn't a real recipe because I'm not going to preface it with my entire +fucking life story. Let's talk about the food. + +Brian bars: + +* Are like Clif Bars, but with the simplicity of ingredients that Larabars have. +* Are easy to make, only needing a food processor (I use a magic bullet) and a + stovetop oven. +* Keep for a long time and don't really need refrigerating (but don't mind it + neither) +* Are paleo, vegan, gluten-free, free-range, grass-fed, whatever... +* Are really really filling. +* Are named after me, deal with it. + +I've worked on this recipe for a bit, trying to make it workable, and will +probably keep adjusting it (and this post) as time goes on. + +### Ingredients + +Nuts and seeds. Most of this recipe is nuts and seeds. Here's the ones I used: + +* 1 cup almonds +* 1 cup peanuts +* 1 cup walnuts +* 1 cup coconut flakes/shavings/whatever +* 1/2 cup flax seeds +* 1/2 cup sesame seeds + +For all of those above it doesn't _really_ matter what nuts/seeds you use, it's +all gonna get ground up anyway. So whatever's cheap works fine. Also, avoid +salt-added ones if you can. + +The other ingredients are: + +* 1 cup raisins/currants +* 1.5 lbs of pitted dates (no added sugar! you don't need it!) +* 2 cups oats + +### Grind up the nuts + +Throw the nuts into the food processor and grind them into a powder. Then throw +that powder into a bowl along with the seeds, coconuts, raisins, and oats, and +mix em good. + +I don't _completely_ grind up the nuts, instead leaving some chunks in it here +and there, but you do you. + +### Prepare the dates + +This is the harder part, and is what took me a couple tries to get right. The +best strategy I've found is to steam the dates a bit over a stove to soften +them. Then, about a cup at a time, you can throw them in the food processor and +turn them into a paste. You may have to add a little water if your processor is +having trouble. + +Once processed you can add the dates to the mix from before and stir it all up. +It'll end up looking something like cookie dough. Except unlike cookie dough +it's completely safe to eat and maybe sorta healthy. + +### Bake it, Finish it + +Put the dough stuff in a pan of some sort, flatten it out, and stick it in the +oven at like 250 or 300 for a few hours. You're trying to cook out the water you +added earlier when you steamed the dates, as well as whatever little moisture +the dates had in the first place. + +Once thoroughly baked you can stick the pan in the fridge to cool and keep, +and/or cut it up into individual bars. Keep in mind that the bars are super +filling and allow for pretty small portions. Wrap em in foil or plastic wrap and +take them to-go, or keep them around for a snack. Or both. Or whatever you want +to do, it's your food. + +### Cleanup + +Dates are simultaneously magical and the most annoying thing to work with, so +there's cleanup problems you may run into with them: + +Protip #1: When cleaning your processed date slime off of your cooking utensils +I'd recommend just letting them soak in water for a while. Dry-ish date slime +will stick to everything, while soaked date slime will come right off. + +Protip #2: Apparently if you want ants, dates are a great way to get ants. My +apartment has never had an ant problem until 3 hours after I made a batch of +these and didn't wipe down my counter enough. I'm still dealing with the ants. +Apparently there's enviromentally friendly ant poisons where the ants happily +carry the poison back into the nest and the whole nest eats it and dies. Which +feels kinda mean in some way, but is also pretty clever and they're just ants +anyway so fuck it. diff --git a/static/src/_posts/2018-10-25-rethinking-identity.md b/static/src/_posts/2018-10-25-rethinking-identity.md new file mode 100644 index 0000000..7fd7e70 --- /dev/null +++ b/static/src/_posts/2018-10-25-rethinking-identity.md @@ -0,0 +1,293 @@ +--- +title: Rethinking Identity +description: >- + A more useful way of thinking about identity on the internet, and using that + to build a service which makes our online life better. +tags: tech +--- + +In my view, the major social media platforms (Facebook, Twitter, Instagram, +etc...) are broken. They worked well at small scales, but billions of people are +now exposed to them, and [Murphy's Law][murphy] has come into effect. The weak +points in the platforms have been found and exploited, to the point where +they're barely usable for interacting with anyone you don't already know in +person. + +[murphy]: https://en.wikipedia.org/wiki/Murphy%27s_law + +On the other hand, social media, at its core, is a powerful tool that humans +have developed, and it's not one to be thrown away lightly (if it can be thrown +away at all). It's worthwhile to try and fix it. So that's what this post is +about. + +A lot of moaning and groaning has already been done on how social media is toxic +for the average person. But the average person isn't doing anything more than +receiving and reacting to their environment. If that environment is toxic, the +person in it becomes so as well. It's certainly possible to filter the toxicity +out, and use a platform to your own benefit, but that takes work on the user's +part. It would be nice to think that people will do more than follow the path of +least resistance, but at scale that's simply not how reality is, and people +shouldn't be expected to do that work. + +To identify what has become toxic about the platforms, first we need to identify +what a non-toxic platform would look like. + +The ideal definition for social media is to give people a place to socialize +with friends, family, and the rest of the world. Defining "socialize" is tricky, +and probably an exercise only a socially awkward person who doesn't do enough +socializing would undertake. "Expressing one's feelings, knowledge, and +experiences to other people, and receiving theirs in turn" feels like a good +approximation. A platform where true socializing was the only activity would be +ideal. + +Here are some trends on our social media which have nothing to do with +socializing: artificially boosted follower numbers on Instagram to obtain +product sponsors, shills in Reddit comments boosting a product or company, +russian trolls on Twitter spreading propaganda, trolls everywhere being dicks +and switching IPs when they get banned, and [that basketball president whose +wife used burner Twitter accounts to trash talk players][president]. + +[president]: https://www.nytimes.com/2018/06/07/sports/bryan-colangelo-sixers-wife.html + +These are all examples of how anonymity can be abused on social media. I want +to say up front that I'm _not_ against anonymity on the internet, and that I +think we can have our cake and eat it too. But we _should_ acknowledge the +direct and indirect problems anonymity causes. We can't trust that anyone on +social media is being honest about who they are and what their motivation is. +This problem extends outside of social media too, to Amazon product reviews (and +basically any other review system), online polls and raffles, multiplayer games, +and surely many other other cases. + +## Identity + +To fix social media, and other large swaths of the internet, we need to rethink +identity. This process started for me a long time ago, when I watched [this TED +talk][identity], which discusses ways in which we misunderstand identity. +Crucially, David Birch points out that identity is not a name, it's more +fundamental than that. + +[identity]: https://www.ted.com/talks/david_birch_identity_without_a_name + +In the context of online platforms, where a user creates an account which +identifies them in some way, identity breaks down into 3 distinct problems +which are often conflated: + +* Authentication: Is this identity owned by this person? +* Differentiation: Is this identity unique to this person? +* Authorization: Is this identity allowed to do X? + +For internet platform developers, authentication has been given the full focus. +Blog posts, articles, guides, and services abound which deal with properly +hashing and checking passwords, two factor authentication, proper account +recovery procedure, etc... While authentication is not a 100% solved problem, +it's had the most work done on it, and the problems which this post deals with +are not affected by it. + +The problem which should instead be focused on is differentiation. + +## Differentiation + +I want to make very clear, once more, that I am _not_ in favor of de-anonymizing +the web, and doing so is not what I'm proposing. + +Differentiation is without a doubt the most difficult identity problem to solve. +It's not even clear that it's solvable offline. Take this situation: you are in +a room, and you are told that one person is going to walk in, then leave, then +another person will do the same. These two persons may or may not be the same +person. You're allowed to do anything you like to each person (with their +consent) in order to determine if they are the same person or not. + +For the vast, vast majority of cases you can simply look with your eyeballs and +see if they are different people. But this will not work 100% of the time. +Identical twins are an obvious example of two persons looking like one, but a +malicious actor with a disguise might be one person posing as two. Biometrics +like fingerprints, iris scanning, and DNA testing fail for many reasons (the +identical twin case being one). You could attempt to give the first a unique +marking on their skin, but who's to say they don't have a solvent, which can +clean that marking off, waiting right outside the door? + +The solutions and refutations can continue on pedantically for some time, but +the point is that there is likely not a 100% solution, and even the 90% +solutions require significant investment. Differentiation is a hard problem, +which most developers don't want to solve. Most are fine with surrogates like +checking that an email or phone number is unique to the platform, but these +aren't enough to stop a dedicated individual or organization. + +### Roll Your Own Differentiation + +If a platform wants to roll their own solution to the differentiation problem, a +proper solution, it might look something like this: + +* Submit an image of your passport, or other government issued ID. This would + have to be checked against the appropriate government agency to ensure the + ID is legitimate. + +* Submit an image of your face, alongside a written note containing a code given + by the platform. Software to detect manipulated images would need to be + employed, as well as reverse image searching to ensure the image isn't being + reused. + +* Once completed, all data needs to be hashed/fingerprinted and then destroyed, + so sensitive data isn't sitting around on servers, but can still be checked + against future users signing up for the platform. + +* A dedicated support team would be needed to handle edge-cases and mistakes. + +None of these is trivial, nor would I trust an up-and-coming platform which is +being bootstrapped out of a basement to implement any of them correctly. +Additionally, going through with this process would be a _giant_ point of +friction for a user creating a new account; they likely would go use a different +platform instead, which didn't have all this nonsense required. + +### Differentiation as a Service + +This is the crux of this post. + +Instead of each platform rolling their own differentiation, what if there was a +service for it. Users would still have to go through the hassle described above, +but only once forever, and on a more trustable site. Then platforms, no matter +what stage of development they're at, could use that service to ensure that +their community of users is free from the problems of fake accounts and trolls. + +This is what the service would look like: + +* A user would have to, at some point, have gone through the steps above to + create an account on the differentiation-as-a-service (DaaS) platform. This + account would have the normal authentication mechanisms that most platforms + do (password, two-factor, etc...). + +* When creating an account on a new platform, the user would login to their DaaS + account (similar to the common "login with Google/Facebook/Twitter" buttons). + +* The DaaS then returns an opaque token, an effectively random string which + uniquely identifies that user, to the platform. The platform can then check in + its own user database for any other users using that token, and know if the + user already has an account. All of this happens without any identifying + information being passed to the platform. + +Similar to how many sites outsource to Cloudflare to handle DDoS protection, +which is better handled en masse by people familiar with the problem, the DaaS +allows for outsourcing the problem of differentiation. Users are more likely to +trust an established DaaS service than a random website they're signing up for. +And signing up for a DaaS is a one-time event, so if enough platforms are using +the DaaS it could become worthwhile for them to do so. + +Finally, since the DaaS also handles authentication, a platform could outsource +that aspect of identity management to it as well. This is optional for the +platform, but for smaller platforms which are just starting up it might be +worthwhile to save that development time. + +### Traits of a Successful DaaS + +It's possible for me to imagine a world where use of DaaS' is common, but +bridging the gap between that world and this one is not as obvious. Still, I +think it's necessary if the internet is to ever evolve passed being, primarily, +a home for trolls. There are a number of traits of an up-and-coming DaaS which +would aid it in being accepted by the internet: + +* **Patience**: there is a critical mass of users and platforms using DaaS' + where it becomes more advantageous for platforms to use the DaaS than not. + Until then, the DaaS and platforms using it need to take deliberate but small + steps. For example: making DaaS usage optional for platform users, and giving + their accounts special marks to indicate they're "authentic" (like Twitter's + blue checkmark); giving those users' activity higher weight in algorithms; + allowing others to filter out activity of non-"authentic" users; etc... These + are all preliminary steps which can be taken which encourage but don't require + platform users to use a DaaS. + +* **User-friendly**: most likely the platforms using a DaaS are what are going + to be paying the bills. A successful DaaS will need to remember that, no + matter where the money comes from, if the users aren't happy they'll stop + using the DaaS, and platforms will be forced to switch to a different one or + stop using them altogether. User-friendliness means more than a nice + interface; it means actually caring for the users' interests, taking their + privacy and security seriously, and in all other aspects being on their side. + In that same vein, competition is important, and so... + +* **No country/government affiliation**: If the DaaS was to be run by a + government agency it would have no incentive to provide a good user + experience, since the users aren't paying the bills (they might not even be in + that country). A DaaS shouldn't be exclusive to any one government or country + anyway. Perhaps it starts out that way, to get off the ground, but ultimately + the internet is a global institution, and is healthiest when it's connecting + individuals _around the world_. A successful DaaS will reach beyond borders + and try to connect everyone. + +Obviously actually starting a DaaS would be a huge undertaking, and would +require proper management and good developers and all that, but such things +apply to most services. + +## Authorization + +The final aspect of identity management, which I haven't talked about yet, is +authorization. This aspect deals with what a particular identity is allowed to +do. For example, is an identity allowed to claim they have a particular name, or +are from a particular place, or are of a particular age? Other things like +administration and moderation privileges also fall under authorization, but they +are generally defined and managed within a platform. + +A DaaS has the potential to help with authorization as well, though with a giant +caveat. If a DaaS were to not fingerprint and destroy the user's data, like +their name and birthday and whatnot, but instead store them, then the following +use-case could also be implemented: + +* A platform wants to know if a user is above a certain age, let's say. It asks + the DaaS for that information. + +* The DaaS asks the user, OAuth style, whether the user is ok with giving the + platform that information. + +* If so, the platform is given that information. + +This is a tricky situation. It adds a lot of liablity for the user, since their +raw data will be stored with the DaaS, ripe for hacking. It also places a lot of +trust with the DaaS to be responsible with users' data and not go giving it out +willy-nilly to others, and instead to only give out the bare-minimum that the +user allows. Since the user is not the DaaS' direct customer, this might be too +much to ask. Nevertheless, it's a use-case which is worth thinking about. + +## Dapps + +The idea of decentralized applications, or dapps, has begun to gain traction. +While not mainstream yet, I think they have potential, and it's necessary to +discuss how a DaaS would operate in a world where the internet is no longer +hosted in central datacenters. + +Consider an Ethereum-based dapp. If a user were to register one ethereum address +(which are really public keys) with their DaaS account, the following use-case +could be implemented: + +* A charity dapp has an ethereum contract, which receives a call from an + ethereum address asking for money. The dapp wants to ensure every person it + sends money to hasn't received any that day. + +* The DaaS has a separate ethereum contract it manages, where it stores all + addresses which have been registered to a user. There is no need to keep any + other user information in the contract. + +* The charity dapp's contract calls the DaaS' contract, asking it if the address + is one of its addresses. If so, and if the charity contract hasn't given to + that address yet today, it can send money to that address. + +There would perhaps need to be some mechanism by which a user could change their +address, which would be complex since that address might be in use by a dapp +already, but it's likely a solvable problem. + +A charity dapp is a bit of a silly example; ideally with a charity dapp there'd +also be some mechanism to ensure a person actually _needs_ the money. But +there's other dapp ideas which would become feasible, due to the inability of a +person to impersonate many people, if DaaS use becomes normal. + +## Why Did I Write This? + +Perhaps you've gotten this far and are asking: "Clearly you've thought about +this a lot, why don't you make this yourself and make some phat stacks of cash +with a startup?" The answer is that this project would need to be started and +run by serious people, who can be dedicated and thorough and responsible. I'm +not sure I'm one of those people; I get distracted easily. But I would like to +see this idea tried, and so I've written this up thinking maybe someone else +would take the reins. + +I'm not asking for equity or anything, if you want to try; it's a free idea for +the taking. But if it turns out to be a bazillion dollar Good Idea™, I won't say +no to a donation... diff --git a/static/src/_posts/2018-11-12-viz-1.md b/static/src/_posts/2018-11-12-viz-1.md new file mode 100644 index 0000000..73c4cd9 --- /dev/null +++ b/static/src/_posts/2018-11-12-viz-1.md @@ -0,0 +1,55 @@ +--- +title: >- + Visualization 1 +description: >- + Using clojurescript and quil to generate interesting visuals +series: viz +git_repo: https://github.com/mediocregopher/viz.git +git_commit: v1 +tags: tech art +--- + +First I want to appologize if you've seen this already, I originally had this up +on my normal website, but I've decided to instead consolidate all my work to my +blog. + +This is the first of a series of visualization posts I intend to work on, each +building from the previous one. + + + + +

+ +This visualization follows a few simple rules: + +* Any point can only be occupied by a single node. A point may be alive (filled) + or dead (empty). + +* On every tick each live point picks from 0 to N new points to spawn, where N is + the number of empty adjacent points to it. If it picks 0, it becomes dead. + +* Each line indicates the parent of a point. Lines have an arbitrary lifetime of + a few ticks, and occupy the points they connect (so new points may not spawn + on top of a line). + +* When a dead point has no lines it is cleaned up, and its point is no longer + occupied. + +The resulting behavior is somewhere between [Conway's Game of +Life](https://en.wikipedia.org/wiki/Conway%27s_Game_of_Life) and white noise. +Though each point operates independently, they tend to move together in groups. +When two groups collide head on they tend to cancel each other out, killing most +of both. When they meet while both heading in a common direction they tend to +peacefully merge towards that direction. + +Sometimes their world becomes so cluttered there's hardly room to move. +Sometimes a major coincidence of events leads to multiple groups canceling each +other at once, opening up the world and allowing for an explosion of new growth. + +Some groups spiral about a single point, sustaining themselves and defending +from outside groups in the same movement. This doesn't last for very long. + +The performance of this visualization is not very optimized, and will probably +eat up your CPU like nothing else. Most of the slowness comes from drawing the +lines; since there's so many individual small ones it's quite cumbersome to do. diff --git a/static/src/_posts/2018-11-12-viz-2.md b/static/src/_posts/2018-11-12-viz-2.md new file mode 100644 index 0000000..de30d56 --- /dev/null +++ b/static/src/_posts/2018-11-12-viz-2.md @@ -0,0 +1,50 @@ +--- +title: >- + Visualization 2 +description: >- + Now in glorious technicolor! +series: viz +git_repo: https://github.com/mediocregopher/viz.git +git_commit: v2 +tags: tech art +--- + + + + + +

+ +This visualization builds on the previous. Structurally the cartesian grid has +been turned into an isometric one, but this is more of an environmental change +than a behavioral one. + +Behavioral changes which were made: + +* When a live point is deciding its next spawn points, it first sorts the set of + empty adjacent points from closest-to-the-center to farthest. It then chooses + a number `n` between `0` to `N` (where `N` is the sorted set's size) and + spawns new points from the first `n` points of the sorted set. `n` is chosen + based on: + + * The live point's linear distance from the center. + + * A random multiplier. + +* Each point is spawned with an attached color, where the color chosen is a + slightly different hue than its parent. The change is deterministic, so all + child points of the same generation have the same color. + +The second change is purely cosmetic, but does create a mesmerizing effect. The +first change alters the behavior dramatically. Only the points which compete for +the center are able to reproduce, but by the same token are more likely to be +starved out by other points doing the same. + +In the previous visualization the points moved around in groups aimlessly. Now +the groups are all competing for the same thing, the center. As a result they +congregate and are able to be viewed as a larger whole. + +The constant churn of the whole takes many forms, from a spiral in the center, +to waves crashing against each other, to outright chaos, to random purges of +nearly all points. Each form lasts for only a few seconds before giving way to +another. diff --git a/static/src/_posts/2019-08-02-program-structure-and-composability.md b/static/src/_posts/2019-08-02-program-structure-and-composability.md new file mode 100644 index 0000000..7add404 --- /dev/null +++ b/static/src/_posts/2019-08-02-program-structure-and-composability.md @@ -0,0 +1,588 @@ +--- +title: >- + Program Structure and Composability +description: >- + Discussing the nature of program structure, the problems presented by + complex structures, and a pattern that helps in solving those problems. +tags: tech +--- + +## Part 0: Introduction + +This post is focused on a concept I call “program structure,” which I will try +to shed some light on before discussing complex program structures. I will then +discuss why complex structures can be problematic to deal with, and will finally +discuss a pattern for dealing with those problems. + +My background is as a backend engineer working on large projects that have had +many moving parts; most had multiple programs interacting with each other, used +many different databases in various contexts, and faced large amounts of load +from millions of users. Most of this post will be framed from my perspective, +and will present problems in the way I have experienced them. I believe, +however, that the concepts and problems I discuss here are applicable to many +other domains, and I hope those with a foot in both backend systems and a second +domain can help to translate the ideas between the two. + +Also note that I will be using Go as my example language, but none of the +concepts discussed here are specific to Go. To that end, I’ve decided to favor +readable code over “correct” code, and so have elided things that most gophers +hold near-and-dear, such as error checking and proper documentation, in order to +make the code as accessible as possible to non-gophers as well. As with before, +I trust that someone with a foot in Go and another language can help me +translate between the two. + +## Part 1: Program Structure + +In this section I will discuss the difference between directory and program +structure, show how global state is antithetical to compartmentalization (and +therefore good program structure), and finally discuss a more effective way to +think about program structure. + +### Directory Structure + +For a long time, I thought about program structure in terms of the hierarchy +present in the filesystem. In my mind, a program’s structure looked like this: + +``` +// The directory structure of a project called gobdns. +src/ + config/ + dns/ + http/ + ips/ + persist/ + repl/ + snapshot/ + main.go +``` + +What I grew to learn was that this conflation of “program structure” with +“directory structure” is ultimately unhelpful. While it can’t be denied that +every program has a directory structure (and if not, it ought to), this does not +mean that the way the program looks in a filesystem in any way corresponds to +how it looks in our mind’s eye. + +The most notable way to show this is to consider a library package. Here is the +structure of a simple web-app which uses redis (my favorite database) as a +backend: + +``` +src/ + redis/ + http/ + main.go +``` + +If I were to ask you, based on that directory structure, what the program does +in the most abstract terms, you might say something like: “The program +establishes an http server that listens for requests. It also establishes a +connection to the redis server. The program then interacts with redis in +different ways based on the http requests that are received on the server.” + +And that would be a good guess. Here’s a diagram that depicts the program +structure, wherein the root node, `main.go`, takes in requests from `http` and +processes them using `redis`. + +{% include image.html + dir="program-structure" file="diag1.jpg" width=519 + descr="Example 1" + %} + +This is certainly a viable guess for how a program with that directory +structure operates, but consider another answer: “A component of the program +called `server` establishes an http server that listens for requests. `server` +also establishes a connection to a redis server. `server` then interacts with +that redis connection in different ways based on the http requests that are +received on the http server. Additionally, `server` tracks statistics about +these interactions and makes them available to other components. The root +component of the program establishes a connection to a second redis server, and +stores those statistics in that redis server.” Here’s another diagram to depict +_that_ program. + +{% include image.html + dir="program-structure" file="diag2.jpg" width=712 + descr="Example 2" + %} + +The directory structure could apply to either description; `redis` is just a +library which allows for interaction with a redis server, but it doesn’t +specify _which_ or _how many_ servers. However, those are extremely important +factors that are definitely reflected in our concept of the program’s +structure, and not in the directory structure. **What the directory structure +reflects are the different _kinds_ of components available to use, but it does +not reflect how a program will use those components.** + + +### Global State vs Compartmentalization + +The directory-centric view of structure often leads to the use of global +singletons to manage access to external resources like RPC servers and +databases. In examples 1 and 2 the `redis` library might contain code which +looks something like this: + +```go +// A mapping of connection names to redis connections. +var globalConns = map[string]*RedisConn{} + +func Get(name string) *RedisConn { + if globalConns[name] == nil { + globalConns[name] = makeRedisConnection(name) + } + return globalConns[name] +} +``` + +Even though this pattern would work, it breaks with our conception of the +program structure in more complex cases like example 2. Rather than the `redis` +component being owned by the `server` component, which actually uses it, it +would be practically owned by _all_ components, since all are able to use it. +Compartmentalization has been broken, and can only be held together through +sheer human discipline. + +**This is the problem with all global state. It is shareable among all +components of a program, and so is accountable to none of them.** One must look +at an entire codebase to understand how a globally held component is used, +which might not even be possible for a large codebase. Therefore, the +maintainers of these shared components rely entirely on the discipline of their +fellow coders when making changes, usually discovering where that discipline +broke down once the changes have been pushed live. + +Global state also makes it easier for disparate programs/components to share +datastores for completely unrelated tasks. In example 2, rather than creating a +new redis instance for the root component’s statistics storage, the coder might +have instead said, “well, there’s already a redis instance available, I’ll just +use that.” And so, compartmentalization would have been broken further. Perhaps +the two instances _could_ be coalesced into the same instance for the sake of +resource efficiency, but that decision would be better made at runtime via the +configuration of the program, rather than being hardcoded into the code. + +From the perspective of team management, global state-based patterns do nothing +except slow teams down. The person/team responsible for maintaining the central +library in which shared components live (`redis`, in the above examples) +becomes the bottleneck for creating new instances for new components, which +will further lead to re-using existing instances rather than creating new ones, +further breaking compartmentalization. Additionally the person/team responsible +for the central library, rather than the team using it, often finds themselves +as the maintainers of the shared resource. + +### Component Structure + +So what does proper program structure look like? In my mind the structure of a +program is a hierarchy of components, or, in other words, a tree. The leaf +nodes of the tree are almost _always_ IO related components, e.g., database +connections, RPC server frameworks or clients, message queue consumers, etc. +The non-leaf nodes will _generally_ be components that bring together the +functionalities of their children in some useful way, though they may also have +some IO functionality of their own. + +Let's look at an even more complex structure, still only using the `redis` and +`http` component types: + +{% include image.html + dir="program-structure" file="diag3.jpg" width=729 + descr="Example 3" + %} + +This component structure contains the addition of the `debug` component. +Clearly the `http` and `redis` components are reusable in different contexts, +but for this example the `debug` endpoint is as well. It creates a separate +http server that can be queried to perform runtime debugging of the program, +and can be tacked onto virtually any program. The `rest-api` component is +specific to this program and is therefore not reusable. Let’s dive into it a +bit to see how it might be implemented: + +```go +// RestAPI is very much not thread-safe, hopefully it doesn't have to handle +// more than one request at once. +type RestAPI struct { + redisConn *redis.RedisConn + httpSrv *http.Server + + // Statistics exported for other components to see + RequestCount int + FooRequestCount int + BarRequestCount int +} + +func NewRestAPI() *RestAPI { + r := new(RestAPI) + r.redisConn := redis.NewConn("127.0.0.1:6379") + + // mux will route requests to different handlers based on their URL path. + mux := http.NewServeMux() + mux.HandleFunc("/foo", r.fooHandler) + mux.HandleFunc("/bar", r.barHandler) + r.httpSrv := http.NewServer(mux) + + // Listen for requests and serve them in the background. + go r.httpSrv.Listen(":8000") + + return r +} + +func (r *RestAPI) fooHandler(rw http.ResponseWriter, r *http.Request) { + r.redisConn.Command("INCR", "fooKey") + r.RequestCount++ + r.FooRequestCount++ +} + +func (r *RestAPI) barHandler(rw http.ResponseWriter, r *http.Request) { + r.redisConn.Command("INCR", "barKey") + r.RequestCount++ + r.BarRequestCount++ +} +``` + + +In that snippet `rest-api` coalesced `http` and `redis` into a simple REST-like +api using pre-made library components. `main.go`, the root component, does much +the same: + +```go +func main() { + // Create debug server and start listening in the background + debugSrv := debug.NewServer() + + // Set up the RestAPI, this will automatically start listening + restAPI := NewRestAPI() + + // Create another redis connection and use it to store statistics + statsRedisConn := redis.NewConn("127.0.0.1:6380") + for { + time.Sleep(1 * time.Second) + statsRedisConn.Command("SET", "numReqs", restAPI.RequestCount) + statsRedisConn.Command("SET", "numFooReqs", restAPI.FooRequestCount) + statsRedisConn.Command("SET", "numBarReqs", restAPI.BarRequestCount) + } +} +``` + +One thing that is clearly missing in this program is proper configuration, +whether from command-line or environment variables, etc. As it stands, all +configuration parameters, such as the redis addresses and http listen +addresses, are hardcoded. Proper configuration actually ends up being somewhat +difficult, as the ideal case would be for each component to set up its own +configuration variables without its parent needing to be aware. For example, +`redis` could set up `addr` and `pool-size` parameters. The problem is that there +are two `redis` components in the program, and their parameters would therefore +conflict with each other. An elegant solution to this problem is discussed in +the next section. + +## Part 2: Components, Configuration, and Runtime + +The key to the configuration problem is to recognize that, even if there are +two of the same component in a program, they can’t occupy the same place in the +program’s structure. In the above example, there are two `http` components: one +under `rest-api` and the other under `debug`. Because the structure is +represented as a tree of components, the “path” of any node in the tree +uniquely represents it in the structure. For example, the two `http` components +in the previous example have these paths: + +``` +root -> rest-api -> http +root -> debug -> http +``` + +If each component were to know its place in the component tree, then it would +easily be able to ensure that its configuration and initialization didn’t +conflict with other components of the same type. If the `http` component sets +up a command-line parameter to know what address to listen on, the two `http` +components in that program would set up: + +``` +--rest-api-listen-addr +--debug-listen-addr +``` + +So how can we enable each component to know its path in the component structure? +To answer this, we’ll have to take a detour through a type, called `Component`. + +### Component and Configuration + +The `Component` type is a made-up type (though you’ll be able to find an +implementation of it at the end of this post). It has a single primary purpose, +and that is to convey the program’s structure to new components. + +To see how this is done, let's look at a couple of `Component`'s methods: + +```go +// Package mcmp + +// New returns a new Component which has no parents or children. It is therefore +// the root component of a component hierarchy. +func New() *Component + +// Child returns a new child of the called upon Component. +func (*Component) Child(name string) *Component + +// Path returns the Component's path in the component hierarchy. It will return +// an empty slice if the Component is the root component. +func (*Component) Path() []string +``` + +`Child` is used to create a new `Component`, corresponding to a new child node +in the component structure, and `Path` is used retrieve the path of any +`Component` within that structure. For the sake of keeping the examples simple, +let’s pretend these functions have been implemented in a package called `mcmp`. +Here’s an example of how `Component` might be used in the `redis` component’s +code: + +```go +// Package redis + +func NewConn(cmp *mcmp.Component, defaultAddr string) *RedisConn { + cmp = cmp.Child("redis") + paramPrefix := strings.Join(cmp.Path(), "-") + + addrParam := flag.String(paramPrefix+"-addr", defaultAddr, "Address of redis instance to connect to") + // finish setup + + return redisConn +} +``` + +In our above example, the two `redis` components' parameters would be: + +``` +// This first parameter is for the stats redis, whose parent is the root and +// therefore doesn't have a prefix. Perhaps stats should be broken into its own +// component in order to fix this. +--redis-addr +--rest-api-redis-addr +``` + +`Component` definitely makes it easier to instantiate multiple redis components +in our program, since it allows them to know their place in the component +structure. + +Having to construct the prefix for the parameters ourselves is pretty annoying, +so let’s introduce a new package, `mcfg`, which acts like `flag` but is aware +of `Component`. Then `redis.NewConn` is reduced to: + +```go +// Package redis + +func NewConn(cmp *mcmp.Component, defaultAddr string) *RedisConn { + cmp = cmp.Child("redis") + addrParam := mcfg.String(cmp, "addr", defaultAddr, "Address of redis instance to connect to") + // finish setup + + return redisConn +} +``` + +Easy-peasy. + +#### But What About Parse? + +Sharp-eyed gophers will notice that there is a key piece missing: When is +`flag.Parse`, or its `mcfg` counterpart, called? When does `addrParam` actually +get populated? It can’t happen inside `redis.NewConn` because there might be +other components after `redis.NewConn` that want to set up parameters. To +illustrate the problem, let’s look at a simple program that wants to set up two +`redis` components: + +```go +func main() { + // Create the root Component, an empty Component. + cmp := mcmp.New() + + // Create the Components for two sub-components, foo and bar. + cmpFoo := cmp.Child("foo") + cmpBar := cmp.Child("bar") + + // Now we want to try to create a redis sub-component for each component. + + // This will set up the parameter "--foo-redis-addr", but bar hasn't had a + // chance to set up its corresponding parameter, so the command-line can't + // be parsed yet. + fooRedis := redis.NewConn(cmpFoo, "127.0.0.1:6379") + + // This will set up the parameter "--bar-redis-addr", but, as mentioned + // before, redis.NewConn can't parse command-line. + barRedis := redis.NewConn(cmpBar, "127.0.0.1:6379") + + // It is only after all components have been instantiated that the + // command-line arguments can be parsed + mcfg.Parse() +} +``` + +While this solves our argument parsing problem, fooRedis and barRedis are not +usable yet because the actual connections have not been made. This is a classic +chicken and the egg problem. The func `redis.NewConn` needs to make a connection +which it cannot do until _after_ `mcfg.Parse` is called, but `mcfg.Parse` cannot +be called until after `redis.NewConn` has returned. We will solve this problem +in the next section. + +### Instantiation vs Initialization + +Let’s break down `redis.NewConn` into two phases: instantiation and +initialization. Instantiation refers to creating the component on the component +structure and having it declare what it needs in order to initialize (e.g., +configuration parameters). During instantiation, nothing external to the +program is performed; no IO, no reading of the command-line, no logging, etc. +All that’s happened is that the empty template of a `redis` component has been +created. + +Initialization is the phase during which the template is filled in. +Configuration parameters are read, startup actions like the creation of database +connections are performed, and logging is output for informational and debugging +purposes. + +The key to making effective use of this dichotomy is to allow _all_ components +to instantiate themselves before they initialize themselves. By doing this we +can ensure, for example, that all components have had the chance to declare +their configuration parameters before configuration parsing is done. + +So let’s modify `redis.NewConn` so that it follows this dichotomy. It makes +sense to leave instantiation-related code where it is, but we need a mechanism +by which we can declare initialization code before actually calling it. For +this, I will introduce the idea of a “hook.” + +#### But First: Augment Component + +In order to support hooks, however, `Component` will need to be augmented with +a few new methods. Right now, it can only carry with it information about the +component structure, but here we will add the ability to carry arbitrary +key/value information as well: + +```go +// Package mcmp + +// SetValue sets the given key to the given value on the Component, overwriting +// any previous value for that key. +func (*Component) SetValue(key, value interface{}) + +// Value returns the value which has been set for the given key, or nil if the +// key was never set. +func (*Component) Value(key interface{}) interface{} + +// Children returns the Component's children in the order they were created. +func (*Component) Children() []*Component +``` + +The final method allows us to, starting at the root `Component`, traverse the +component structure and interact with each `Component`’s key/value store. This +will be useful for implementing hooks. + +#### Hooks + +A hook is simply a function that will run later. We will declare a new package, +calling it `mrun`, and say that it has two new functions: + +```go +// Package mrun + +// InitHook registers the given hook to the given Component. +func InitHook(cmp *mcmp.Component, hook func()) + +// Init runs all hooks registered using InitHook. Hooks are run in the order +// they were registered. +func Init(cmp *mcmp.Component) +``` + +With these two functions, we are able to defer the initialization phase of +startup by using the same `Components` we were passing around for the purpose +of denoting component structure. + +Now, with these few extra pieces of functionality in place, let’s reconsider the +most recent example, and make a program that creates two redis components which +exist independently of each other: + +```go +// Package redis + +// NOTE that NewConn has been renamed to InstConn, to reflect that the returned +// *RedisConn is merely instantiated, not initialized. + +func InstConn(cmp *mcmp.Component, defaultAddr string) *RedisConn { + cmp = cmp.Child("redis") + + // we instantiate an empty RedisConn instance and parameters for it. Neither + // has been initialized yet. They will remain empty until initialization has + // occurred. + redisConn := new(RedisConn) + addrParam := mcfg.String(cmp, "addr", defaultAddr, "Address of redis instance to connect to") + + mrun.InitHook(cmp, func() { + // This hook will run after parameter initialization has happened, and + // so addrParam will be usable. Once this hook as run, redisConn will be + // usable as well. + *redisConn = makeRedisConnection(*addrParam) + }) + + // Now that cmp has had configuration parameters and intialization hooks + // set into it, return the empty redisConn instance back to the parent. + return redisConn +} +``` + +```go +// Package main + +func main() { + // Create the root Component, an empty Component. + cmp := mcmp.New() + + // Create the Components for two sub-components, foo and bar. + cmpFoo := cmp.Child("foo") + cmpBar := cmp.Child("bar") + + // Add redis components to each of the foo and bar sub-components. + redisFoo := redis.InstConn(cmpFoo, "127.0.0.1:6379") + redisBar := redis.InstConn(cmpBar, "127.0.0.1:6379") + + // Parse will descend into the Component and all of its children, + // discovering all registered configuration parameters and filling them from + // the command-line. + mcfg.Parse(cmp) + + // Now that configuration parameters have been initialized, run the Init + // hooks for all Components. + mrun.Init(cmp) + + // At this point the redis components have been fully initialized and may be + // used. For this example we'll copy all keys from one to the other. + keys := redisFoo.Command("KEYS", "*") + for i := range keys { + val := redisFoo.Command("GET", keys[i]) + redisBar.Command("SET", keys[i], val) + } +} +``` + +## Conclusion + +While the examples given here are fairly simplistic, the pattern itself is quite +powerful. Codebases naturally accumulate small, domain-specific behaviors and +optimizations over time, especially around the IO components of the program. +Databases are used with specific options that an organization finds useful, +logging is performed in particular places, metrics are counted around certain +pieces of code, etc. + +By programming with component structure in mind, we are able to keep these +optimizations while also keeping the clarity and compartmentalization of the +code intact. We can keep our code flexible and configurable, while also +re-usable and testable. Also, the simplicity of the tools involved means they +can be extended and retrofitted for nearly any situation or use-case. + +Overall, this is a powerful pattern that I’ve found myself unable to do without +once I began using it. + +### Implementation + +As a final note, you can find an example implementation of the packages +described in this post here: + +* [mcmp](https://godoc.org/github.com/mediocregopher/mediocre-go-lib/mcmp) +* [mcfg](https://godoc.org/github.com/mediocregopher/mediocre-go-lib/mcfg) +* [mrun](https://godoc.org/github.com/mediocregopher/mediocre-go-lib/mrun) + +The packages are not stable and are likely to change frequently. You’ll also +find that they have been extended quite a bit from the simple descriptions found +here, based on what I’ve found useful as I’ve implemented programs using +component structures. With these two points in mind, I would encourage you to +look and take whatever functionality you find useful for yourself, and not use +the packages directly. The core pieces are not different from what has been +described in this post. diff --git a/static/src/_posts/2020-04-26-trading-in-the-rain.md b/static/src/_posts/2020-04-26-trading-in-the-rain.md new file mode 100644 index 0000000..5f2dbaa --- /dev/null +++ b/static/src/_posts/2020-04-26-trading-in-the-rain.md @@ -0,0 +1,56 @@ +--- +title: >- + Trading in the Rain +description: >- + All those... gains... will be lost like... tears... +tags: tech art crypto +--- + + + + + + + + + + + + + + + + + + + + + + + + + + +
+For each pair listed below, live trade data will be pulled down from the +Cryptowat.ch Websocket +API and used to generate musical rain drops. The price of each trade +determines both the musical note and position of the rain drop on the screen, +while the volume of each trade determines how long the note is held and how big +the rain drop is. + +

Pairs to be generated, by color:

+ + +

+ + +
+ + + diff --git a/static/src/_posts/2020-05-30-denver-protests.md b/static/src/_posts/2020-05-30-denver-protests.md new file mode 100644 index 0000000..710987f --- /dev/null +++ b/static/src/_posts/2020-05-30-denver-protests.md @@ -0,0 +1,161 @@ +--- +title: >- + Denver Protests +description: >- + Craziness +--- + +# Saturday, May 30th + +We went to the May 30th protest at Civic Center Park. We were there for a few +hours during the day, leaving around 4pm. I would describe the character of the +protest as being energetic, angry, but contained. A huge crowd moved in and +around civic center, chanting and being rowdy, but clearly was being led. + +After a last hurrah at the pavilion it seemed that the organized event was +"over". We stayed a while longer, and eventually headed back home. I don't feel +that people really left the park at the same time we did; mostly everyone just +dispersed around the park and found somewhere to keep hanging out. + +Tonight there has been an 8pm curfew. The police lined up on the north side of +the park, armored and clearly ready for action. We watched all of this on the +live news stations, gritting our teeth through the comentary of their reporters. +As the police stood there, the clock counting down to 8, the protesters grew +more and more irritated. They taunted the police, and formed a line of their +own. The braver (or more dramatic) protesters walked around in the no-man's land +between them, occasionally earning themselves some teargas. + +The police began pushing forward just before 8 a little, but began pushing in +earnest just after 8, after the howling. They would advance, wait, advance, wait +again. An armada of police cars, ambulance, and fire trucks followed the line as +it advanced. + +The police did not give the protesters anywhere to go except into Capital Hill, +southeast of Civic Center Park. We watched as a huge crowd marched past the +front of our house, chanting their call and response: "What's his name?" "GEORGE +FLOYD". The feeling wasn't of violence still, just anger. Indignant at a curfew +aimed at quelling a movement, the protesters simply kept moving. The police were +never far behind. + +We sat on our front stoop with our neighbors and watched the night unfold. I +don't think a single person in our building or the buildings to the left and +right of us hadn't gone to protest today in some capacity. We came back from our +various outings and sat out front, watching the crowds and patrolling up and +down the street to keep tabs on things. + +Around 9pm the fires started. We saw them on the news, and in person. They were +generally dumpster fires, generally placed such that they were away from +buildings, clearly being done more to be annoying than to accomplish anything +specific. A very large set of fires was started a block south of us, in the +middle of the street. The fire department was there within a few minutes to put +those out, before moving on. + +From the corner of my eye, sitting back on the stoop, I noticed our neighbors +running into their backyard. We ran after them, and they told us there was a +dumpster fire in our alley. They were running with fire extinguishers, and we +ran inside to grab some of our own. By the time we got to the backyard the fire +was only smouldering, and the fire department was coming down the alley. We +scurried back into the backyard. A few minutes later I peeked my head around the +corner, into the alley, to see what happening. I was greeted by at least two +police in riot gear, guarding the dumpster as the fire department worked. They +saw me but didn't move, and I quickly retreated back to the yard. + +Talking to our neighbor later we found out she had seen a group of about 10 +people back there, and watched them jump the fence into another backyard in +order to escape the alley. She thinks they, or some subset of them, started the +fire. She looked one in the eye, she says, and didn't get the impression they +were trying to cause damage, just to make a statement. + +The fires stopped not long after that, it seems. We're pretty sure the fire +trucks were just driving up and down the main roads, looking into alleys and +stopping all fires they could find. In all this time the police didn't do much. +They would hold a line, but never chase anyone. Even now, as I write this around +midnight, people are still out, meandering around in small groups, and police +are present but not really doing anything. + +It's hard to get a good view of everything though. All we have is livestreams on +youtube to go on at this point. There's a couple intrepid amateur reporters out +there, getting into the crowds and streaming events as they happen. Right now +we're watching people moving down Lincoln towards Civic Center Park, some of +them trying to smash windows of buildings as they go. + +The violence of these protests is going to be the major story of tonight, I know +that already. That I know of there's been 3 police injured, some broken +windows, and quite a bit of graffiti. I do believe the the tactic of pushing +everyone into Cap Hill had the desired effect of reducing looting (again, as far +as I can tell so far), but at that expense of those who live here who have to +endure latent tear gas, dumpster fires, and sirens all through the night. + +Even now, at midnight, from what I can see from my porch and from these live +streams, the protesters are not violent. At worst they are guilty of a lot of +loitering. The graffiti, the smashed windows, the injured officers, all of these +things will be held up as examples of the anarchy and violence inherent to the +protesters. But I don't think that's an honest picture. The vast, vast majority +of those out right now are civily disobeying an unjust curfew, trying to keep +the energy of the movement alive. + +My thoughts about these things are complicated. When turning a corner on the +street I'm far more afraid to see the police than to see other protesters. The +fires have been annoying, and stupid, and unhelpful, but were never threatening. +The violence is stupid, though I don't shed many tears for a looted Chili's or +Papa Johns. The police have actually shown more restraint than I expected in all +of this, though funneling the protest into a residential neighborhood was an +incredibly stupid move. Could the protesters not have just stayed in the park? +Yes, the park would likely have been turned into an encampment, but it was +already heading into that direction due to Covid-19. Overall, this night didn't +need to be so hard, but Denver handled this well. + +But, it's only 1am, and the night has a long way to go. Things could still get +worse. Even now I'm watching people trying to break into the supreme court +building. Civic Center Park appears to be very populated again, and the police +are very present there again. It's possible I may eat my words. + +# Monday, June 1st + +Yesterday was quite a bit more tame than the craziness Saturday. I woke up +Sunday morning feeling antsy, and rode my bike around to see the damage. I had a +long conversation with a homeless man named Gary in Civic Center Park. He was +pissed, and had a lot to say about the "suburban kids" destroying the park he +and many others live in, causing it to be shut down and tear gassed. The +protesters saw it as a game, according to him, but it was life and death for the +homeless; three of his guys got beat up in the street, and neither police nor +protesters stopped it. + +Many people had shown up to the park early to help clean it up. Apart from the +graffiti, which was also in the process of being cleaned, it was hard to tell +anything had actually happened. Gary had some words about them as well, that +they were only there for the gram and some pats on the back, but once they left +his life would be back as it was. I could feel that, but I also appreciated that +people were cognizant that damage was being done and were willing to do +something about it. + +I rode around 16th street mall, down colfax, and back up 13th, looking to see if +anything had happened. For the most part there was no damage, save the graffiti. +A mediterranean restaurant got its windows smashed, as well as the Office Depot. +The restaurant was unfortunate, Office Depot will be ok. + +The protest yesterday was much more peaceful. The cops were nowhere to be found +when curfew hit, but did eventually show up when the protest moved down Colfax. +They had lined the streets around their precinct building there, but for the +most part the protesters just kept walking. This is when the "violence" started. +The cops moved into the street, forming a line across Colfax behind the +protesters. Police cars and vans started moving. As the protest turned back, +presumably to head back to the capitol lawn, it ran into the riot line. + +Predictably, everyone scattered. The cat-and-mouse game had begun, which meant +dumpster fires, broken windows, tear gas, and all the rest. Watching the whole +thing it was extremely clear to us, though not the news casters, unfortunately, +that if the police hadn't moved out into Colfax nothing would have ever +happened. Instead, the news casters lamented that people were bringing things +like helmets, gas masks, traffic cones, shields, etc... and so were clearly not there +"for the right reasons". + +The thing that the news casters couldn't seem to grasp was that the police +attempting to control these situations are what are catalyzing them in the first +place. These are protests _against_ the police, they cannot take place under the +terms the police choose. If the police were not here setting terms, but instead +working with the peaceful protesters (the vast, vast majority) to quell the +violence, no one would be here with helmets, gas masks, traffic cones, +shields... But instead the protesters feel they need to protect themselves in +order to be heard, and the police feel they have to exercise their power to +maintain control, and so the situation degrades. diff --git a/static/src/_posts/2020-07-07-viz-3.md b/static/src/_posts/2020-07-07-viz-3.md new file mode 100644 index 0000000..7f5280d --- /dev/null +++ b/static/src/_posts/2020-07-07-viz-3.md @@ -0,0 +1,155 @@ +--- +title: >- + Visualization 3 +description: >- + All the pixels. +series: viz +tags: tech art +--- + + + +This visualization is built from the ground up. On every frame a random set of +pixels is chosen. Each chosen pixel calculates the average of its color and the +color of a random neighbor. Some random color drift is added in as well. It +replaces its own color with that calculated color. + +Choosing a neighbor is done using the "asteroid rule", ie a pixel at the very +top row is considered to be the neighbor of the pixel on the bottom row of the +same column. + +Without the asteroid rule the pixels would all eventually converge into a single +uniform color, generally a light blue, due to the colors at the edge, the reds, +being quickly averaged away. With the asteroid rule in place the canvas has no +edges, thus no position on the canvas is favored and balance can be maintained. + + diff --git a/static/src/_posts/2020-11-16-component-oriented-programming.md b/static/src/_posts/2020-11-16-component-oriented-programming.md new file mode 100644 index 0000000..64ac091 --- /dev/null +++ b/static/src/_posts/2020-11-16-component-oriented-programming.md @@ -0,0 +1,353 @@ +--- +title: >- + Component-Oriented Programming +description: >- + A concise description of. +tags: tech +--- + +[A previous post in this +blog](/2019/08/02/program-structure-and-composability.html) focused on a +framework developed to make designing component-based programs easier. In +retrospect, the proposed pattern/framework was over-engineered. This post +attempts to present the same ideas in a more distilled form, as a simple +programming pattern and without the unnecessary framework. + +## Components + +Many languages, libraries, and patterns make use of a concept called a +"component," but in each case the meaning of "component" might be slightly +different. Therefore, to begin talking about components, it is necessary to first +describe what is meant by "component" in this post. + +For the purposes of this post, the properties of components include the +following. + + 1... **Abstract**: A component is an interface consisting of one or more +methods. + +   1a... A function might be considered a single-method component +_if_ the language supports first-class functions. + +   1b... A component, being an interface, may have one or more +implementations. Generally, there will be a primary implementation, which is +used during a program's runtime, and secondary "mock" implementations, which are +only used when testing other components. + + 2... **Instantiatable**: An instance of a component, given some set of +parameters, can be instantiated as a standalone entity. More than one of the +same component can be instantiated, as needed. + + 3... **Composable**: A component may be used as a parameter of another +component's instantiation. This would make it a child component of the one being +instantiated (the parent). + + 4... **Pure**: A component may not use mutable global variables (i.e., +singletons) or impure global functions (e.g., system calls). It may only use +constants and variables/components given to it during instantiation. + + 5... **Ephemeral**: A component may have a specific method used to clean +up all resources that it's holding (e.g., network connections, file handles, +language-specific lightweight threads, etc.). + +   5a... This cleanup method should _not_ clean up any child +components given as instantiation parameters. + +   5b... This cleanup method should not return until the +component's cleanup is complete. + +   5c... A component should not be cleaned up until all its +parent components are cleaned up. + +Components are composed together to create component-oriented programs. This is +done by passing components as parameters to other components during +instantiation. The `main` procedure of the program is responsible for +instantiating and composing the components of the program. + +## Example + +It's easier to show than to tell. This section posits a simple program and then +describes how it would be implemented in a component-oriented way. The program +chooses a random number and exposes an HTTP interface that allows users to try +and guess that number. The following are requirements of the program: + +* A guess consists of a name that identifies the user performing the guess and + the number that is being guessed; + +* A score is kept for each user who has performed a guess; + +* Upon an incorrect guess, the user should be informed of whether they guessed + too high or too low, and 1 point should be deducted from their score; + +* Upon a correct guess, the program should pick a new random number against + which to check subsequent guesses, and 1000 points should be added to the + user's score; + +* The HTTP interface should have two endpoints: one for users to submit guesses, + and another that lists out user scores from highest to lowest; + +* Scores should be saved to disk so they survive program restarts. + +It seems clear that there will be two major areas of functionality for our +program: score-keeping and user interaction via HTTP. Each of these can be +encapsulated into components called `scoreboard` and `httpHandlers`, +respectively. + +`scoreboard` will need to interact with a filesystem component to save/restore +scores (because it can't use system calls directly; see property 4). It would be +wasteful for `scoreboard` to save the scores to disk on every score update, so +instead it will do so every 5 seconds. A time component will be required to +support this. + +`httpHandlers` will be choosing the random number which is being guessed, and +will therefore need a component that produces random numbers. `httpHandlers` +will also be recording score changes to `scoreboard`, so it will need access to +`scoreboard`. + +The example implementation will be written in go, which makes differentiating +HTTP handler functionality from the actual HTTP server quite easy; thus, there +will be an `httpServer` component that uses `httpHandlers`. + +Finally, a `logger` component will be used in various places to log useful +information during runtime. + +[The example implementation can be found +here.](/assets/component-oriented-design/v1/main.html) While most of it can be +skimmed, it is recommended to at least read through the `main` function to see +how components are composed together. Note that `main` is where all components +are instantiated, and that all components' take in their child components as +part of their instantiation. + +## DAG + +One way to look at a component-oriented program is as a directed acyclic graph +(DAG), where each node in the graph represents a component, and each edge +indicates that one component depends upon another component for instantiation. +For the previous program, it's quite easy to construct such a DAG just by +looking at `main`, as in the following: + +``` +net.Listener rand.Rand os.File + ^ ^ ^ + | | | + httpServer --> httpHandlers --> scoreboard --> time.Ticker + | | | + +---------------+---------------+--> log.Logger +``` + +Note that all the leaves of the DAG (i.e., nodes with no children) describe the +points where the program meets the operating system via system calls. The leaves +are, in essence, the program's interface with the outside world. + +While it's not necessary to actually draw out the DAG for every program one +writes, it can be helpful to at least think about the program's structure in +these terms. + +## Benefits + +Looking at the previous example implementation, one would be forgiven for having +the immediate reaction of "This seems like a lot of extra work for little gain. +Why can't I just make the system calls where I need to, and not bother with +wrapping them in interfaces and all these other rules?" + +The following sections will answer that concern by showing the benefits gained +by following a component-oriented pattern. + +### Testing + +Testing is important, that much is being assumed. + +A distinction to be made with testing is between unit and non-unit tests. Unit +tests are those for which there are no requirements for the environment outside +the test, such as the existence of global variables, running databases, +filesystems, or network services. Unit tests do not interact with the world +outside the testing procedure, but instead use mocks in place of the +functionality that would be expected by that world. + +Unit tests are important because they are faster to run and more consistent than +non-unit tests. Unit tests also force the programmer to consider different +possible states of a component's dependencies during the mocking process. + +Unit tests are often not employed by programmers, because they are difficult to +implement for code that does not expose any way to swap out dependencies for +mocks of those dependencies. The primary culprit of this difficulty is the +direct usage of singletons and impure global functions. For component-oriented +programs, all components inherently allow for the swapping out of any +dependencies via their instantiation parameters, so there's no extra effort +needed to support unit tests. + +[Tests for the example implementation can be found +here.](/assets/component-oriented-design/v1/main_test.html) Note that all +dependencies of each component being tested are mocked/stubbed next to them. + +### Configuration + +Practically all programs require some level of runtime configuration. This may +take the form of command-line arguments, environment variables, configuration +files, etc. + +For a component-oriented program, all components are instantiated in the same +place, `main`, so it's very easy to expose any arbitrary parameter to the user +via configuration. For any component that is affected by a configurable +parameter, that component merely needs to take an instantiation parameter for +that configurable parameter; `main` can connect the two together. This accounts +for the unit testing of a component with different configurations, while still +allowing for the configuration of any arbitrary internal functionality. + +For more complex configuration systems, it is also possible to implement a +`configuration` component that wraps whatever configuration-related +functionality is needed, which other components use as a sub-component. The +effect is the same. + +To demonstrate how configuration works in a component-oriented program, the +example program's requirements will be augmented to include the following: + +* The point change values for both correct and incorrect guesses (currently + hardcoded at 1000 and 1, respectively) should be configurable on the + command-line; + +* The save file's path, HTTP listen address, and save interval should all be + configurable on the command-line. + +[The new implementation, with newly configurable parameters, can be found +here.](/assets/component-oriented-design/v2/main.html) Most of the program has +remained the same, and all unit tests from before remain valid. The primary +difference is that `scoreboard` takes in two new parameters for the point change +values, and configuration is set up inside `main` using the `flags` package. + +### Setup/Runtime/Cleanup + +A program can be split into three stages: setup, runtime, and cleanup. Setup is +the stage during which the internal state is assembled to make runtime possible. +Runtime is the stage during which a program's actual function is being +performed. Cleanup is the stage during which the runtime stops and internal +state is disassembled. + +A graceful (i.e., reliably correct) setup is quite natural to accomplish for +most. On the other hand, a graceful cleanup is, unfortunately, not a programmer's +first concern (if it is a concern at all). + +When building reliable and correct programs, a graceful cleanup is as important +as a graceful setup and runtime. A program is still running while it is being +cleaned up, and it's possibly still acting on the outside world. Shouldn't +it behave correctly during that time? + +Achieving a graceful setup and cleanup with components is quite simple. + +During setup, a single-threaded procedure (`main`) first constructs the leaf +components, then the components that take those leaves as parameters, then the +components that take _those_ as parameters, and so on, until the component DAG +is fully constructed. + +At this point, the program's runtime has begun. + +Once the runtime is over, signified by a process signal or some other mechanism, +it's only necessary to call each component's cleanup method (if any; see +property 5) in the reverse of the order in which the components were +instantiated. This order is inherently deterministic, as the components were +instantiated by a single-threaded procedure. + +Inherent to this pattern is the fact that each component will certainly be +cleaned up before any of its child components, as its child components must have +been instantiated first, and a component will not clean up child components +given as parameters (properties 5a and 5c). Therefore, the pattern avoids +use-after-cleanup situations. + +To demonstrate a graceful cleanup in a component-oriented program, the example +program's requirements will be augmented to include the following: + +* The program will terminate itself upon an interrupt signal; + +* During termination (cleanup), the program will save the latest set of scores + to disk one final time. + +[The new implementation that accounts for these new requirements can be found +here.](/assets/component-oriented-design/v3/main.html) For this example, go's +`defer` feature could have been used instead, which would have been even +cleaner, but was omitted for the sake of those using other languages. + + +## Conclusion + +The component pattern helps make programs more reliable with only a small amount +of extra effort incurred. In fact, most of the pattern has to do with +establishing sensible abstractions around global functionality and remembering +certain idioms for how those abstractions should be composed together, something +most of us already do to some extent anyway. + +While beneficial in many ways, component-oriented programming is merely a tool +that can be applied in many cases. It is certain that there are cases where it +is not the right tool for the job, so apply it deliberately and intelligently. + +## Criticisms/Questions + +In lieu of a FAQ, I will attempt to premeditate questions and criticisms of the +component-oriented programming pattern laid out in this post. + +**This seems like a lot of extra work.** + +Building reliable programs is a lot of work, just as building a +reliable _anything_ is a lot of work. Many of us work in an industry that likes +to balance reliability (sometimes referred to by the more specious "quality") +with malleability and deliverability, which naturally leads to skepticism of any +suggestions requiring more time spent on reliability. This is not necessarily a +bad thing, it's just how the industry functions. + +All that said, a pattern need not be followed perfectly to be worthwhile, and +the amount of extra work incurred by it can be decided based on practical +considerations. I merely maintain that code which is (mostly) component-oriented +is easier to maintain in the long run, even if it might be harder to get off the +ground initially. + +**My language makes this difficult.** + +I don't know of any language which makes this pattern particularly easier than +others, so, unfortunately, we're all in the same boat to some extent (though I +recognize that some languages, or their ecosystems, make it more difficult than +others). It seems to me that this pattern shouldn't be unbearably difficult for +anyone to implement in any language either, however, as the only language +feature required is abstract typing. + +It would be nice to one day see a language that explicitly supports this +pattern by baking the component properties in as compiler-checked rules. + +**My `main` is too big** + +There's no law saying all component construction needs to happen in `main`, +that's just the most sensible place for it. If there are large sections of your +program that are independent of each other, then they could each have their own +construction functions that `main` then calls. + +Other questions that are worth asking include: Can my program be split up +into multiple programs? Can the responsibilities of any of my components be +refactored to reduce the overall complexity of the component DAG? Can the +instantiation of any components be moved within their parent's +instantiation function? + +(This last suggestion may seem to be disallowed, but is fine as long as the +parent's instantiation function remains pure.) + +**Won't this will result in over-abstraction?** + +Abstraction is a necessary tool in a programmer's toolkit, there is simply no +way around it. The only questions are "how much?" and "where?" + +The use of this pattern does not affect how those questions are answered, in my +opinion, but instead aims to more clearly delineate the relationships and +interactions between the different abstracted types once they've been +established using other methods. Over-abstraction is possible and avoidable +regardless of which language, pattern, or framework is being used. + +**Does CoP conflict with object-oriented or functional programming?** + +I don't think so. OoP languages will have abstract types as part of their core +feature-set; most difficulties are going to be with deliberately _not_ using +other features of an OoP language, and with imported libraries in the language +perhaps making life inconvenient by not following CoP (specifically regarding +cleanup and the use of singletons). + +For functional programming, it may well be that, depending on the language, CoP +is technically being used, as functional languages are already generally +antagonistic toward globals and impure functions, which is most of the battle. +If anything, the transition from functional to component-oriented programming +will generally be an organizational task. diff --git a/static/src/_posts/2021-01-01-new-year-new-resolution.md b/static/src/_posts/2021-01-01-new-year-new-resolution.md new file mode 100644 index 0000000..8e9edc7 --- /dev/null +++ b/static/src/_posts/2021-01-01-new-year-new-resolution.md @@ -0,0 +1,50 @@ +--- +title: >- + New Year, New Resolution +description: >- + This blog is about to get some action. +--- + +At this point I'm fairly well known amongst friends and family for my new year's +resolutions, to the point that earlier this month a friend of mine asked me +"What's it going to be this year?". In the past I've done things like no +chocoloate, no fast food, no added sugar (see a theme?), and no social media. +They've all been of the "I won't do this" sort, because it's a lot easier to +stop doing something than to start doing something new. Doing something new +inherently means _also_ not doing something else; there's only so many hours in +the day, afterall. + +## This Year + +This year I'm going to shake things up, I'm going to do something new. My +resolution is to have published 52 posts on this blog by Jan 1, 2022, 00:00 UTC. +Only one post per day can count towards the 52. A post must be "substantial" to +count towards the 52. A non-substantial post would be something like the 100 +word essay about my weekend that I wrote in first grade, which went something +like "My weekend was really really really ('really' 96 more times) really really +boring". + +Other than that, it's pretty open-ended. + +## Why + +My hope is that I'll get more efficient at writing these things. Usually I take +a lot of time to craft a post, weeks in some cases. I really appreciate those of +you that have taken the time to read them, but to be frank the time commitment +just isn't worth it. With practice I can hopefully learn what exactly I have to +say that others are interested in, and then go back to spending a lot of time +crafting the things being said. + +Another part of this is going to be learning how to market myself properly, +something I've always been reticent to do. Our world is filled with people +shouting into the void of the internet, each with their own reasons for wanting +to be heard. Does it need another? Probably not. But here I am. I guess what I'm +really going to be doing is learning _why_ I want to do this; I know I want to +have others read what I write, but is it possible that that desire isn't +entirely selfish? Is it ok if it is? + +Once I'm comfortable with why I'm doing this it will, hopefully, be easier to +figure out a marketing avenue I feel comfortable with putting a lot of energy +towards. There must be at least _one_... + +So consider this #1, world. Only 51 to go. diff --git a/static/src/_posts/2021-01-09-ginger.md b/static/src/_posts/2021-01-09-ginger.md new file mode 100644 index 0000000..fde8868 --- /dev/null +++ b/static/src/_posts/2021-01-09-ginger.md @@ -0,0 +1,354 @@ +--- +title: >- + Ginger +description: >- + Yes, it does exist. +series: ginger +tags: tech +--- + +This post is about a programming language that's been bouncing around in my head +for a _long_ time. I've tried to actually implement the language three or more +times now, but everytime I get stuck or run out of steam. It doesn't help that +everytime I try again the form of the language changes significantly. But all +throughout the name of the language has always been "Ginger". It's a good name. + +In the last few years the form of the language has somewhat solidified in my +head, so in lieu of actually working on it I'm going to talk about what it +currently looks like. + +## Abstract Syntax Lists + +_In the beginning_ there was assembly. Well, really in the beginning there were +punchcards, and probably something even more esoteric before that, but it was +all effectively the same thing: a list of commands the computer would execute +sequentially, with the ability to jump to odd places in the sequence depending +on conditions at runtime. For the purpose of this post, we'll call this class of +languages "abstract syntax list" (ASL) languages. + +Here's a hello world program in my favorite ASL language, brainfuck: + +``` +++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.++ ++.------.--------.>>+.>++. +``` + +(If you've never seen brainfuck, it's deliberately unintelligible. But it _is_ +an ASL, each character representing a single command, executed by the brainfuck +runtime from left to right.) + +ASLs did the job at the time, but luckily we've mostly moved on past them. + +## Abstract Syntax Trees + +Eventually programmers upgraded to C-like languages. Rather than a sequence of +commands, these languages were syntactically represented by an "abstract syntax +tree" (AST). Rather than executing commands in essentially the same order they +are written, an AST language compiler reads the syntax into a tree of syntax +nodes. What it then does with the tree is language dependent. + +Here's a program which outputs all numbers from 0 to 9 to stdout, written in +(slightly non-idiomatic) Go: + +```go +i := 0 +for { + if i == 10 { + break + } + fmt.Println(i) + i++ +} +``` + +When the Go compiler sees this, it's going to first parse the syntax into an +AST. The AST might look something like this: + +``` +(root) + |-(:=) + | |-(i) + | |-(0) + | + |-(for) + |-(if) + | |-(==) + | | |-(i) + | | |-(10) + | | + | |-(break) + | + |-(fmt.Println) + | |-(i) + | + |-(++) + |-(i) +``` + +Each of the non-leaf nodes in the tree represents an operation, and the children +of the node represent the arguments to that operation, if any. From here the +compiler traverses the tree depth-first in order to turn each operation it finds +into the appropriate machine code. + +There's a sub-class of AST languages called the LISP ("LISt Processor") +languages. In a LISP language the AST is represented using lists of elements, +where the first element in each list denotes the operation and the rest of the +elements in the list (if any) represent the arguments. Traditionally each list +is represented using parenthesis. For example `(+ 1 1)` represents adding 1 and +1 together. + +As a more complex example, here's how to print numbers 0 through 9 to stdout +using my favorite (and, honestly, only) LISP, Clojure: + +```clj +(doseq + [n (range 10)] + (println n)) +``` + +Much smaller, but the idea is there. In LISPs there is no differentiation +between the syntax, the AST, and the language's data structures; they are all +one and the same. For this reason LISPs generally have very powerful macro +support, wherein one uses code written in the language to transform code written +in that same language. With macros users can extend a language's functionality +to support nearly anything they need to, but because macro generation happens +_before_ compilation they can still reap the benefits of compiler optimizations. + +### AST Pitfalls + +The ASL (assembly) is essentially just a thin layer of human readability on top +of raw CPU instructions. It does nothing in the way of representing code in the +way that humans actually think about it (relationships of types, flow of data, +encapsulation of behavior). The AST is a step towards expressing code in human +terms, but it isn't quite there in my opinion. Let me show why by revisiting the +Go example above: + +```go +i := 0 +for { + if i > 9 { + break + } + fmt.Println(i) + i++ +} +``` + +When I understand this code I don't understand it in terms of its syntax. I +understand it in terms of what it _does_. And what it does is this: + +* with a number starting at 0, start a loop. +* if the number is greater than 9, stop the loop. +* otherwise, print the number. +* add one to the number. +* go to start of loop. + +This behavior could be further abstracted into the original problem statement, +"it prints numbers 0 through 9 to stdout", but that's too general, as there +are different ways for that to be accomplished. The Clojure example first +defines a list of numbers 0 through 9 and then iterates over that, rather than +looping over a single number. These differences are important when understanding +what code is doing. + +So what's the problem? My problem with ASTs is that the syntax I've written down +does _not_ reflect the structure of the code or the flow of data which is in my +head. In the AST representation if you want to follow the flow of data (a single +number) you _have_ to understand the semantic meaning of `i` and `:=`; the AST +structure itself does not convey how data is being moved or modified. +Essentially, there's an extra implicit transformation that must be done to +understand the code in human terms. + +## Ginger: An Abstract Syntax Graph Language + +In my view the next step is towards using graphs rather than trees for +representing our code. A graph has the benefit of being able to reference +"backwards" into itself, where a tree cannot, and so can represent the flow of +data much more directly. + +I would like Ginger to be an ASG language where the language is the graph, +similar to a LISP. But what does this look like exactly? Well, I have a good +idea about what the graph _structure_ will be like and how it will function, but +the syntax is something I haven't bothered much with yet. Representing graph +structures in a text file is a problem to be tackled all on its own. For this +post we'll use a made-up, overly verbose, and probably non-usable syntax, but +hopefully it will convey the graph structure well enough. + +### Nodes, Edges, and Tuples + +All graphs have nodes, where each node contains a value. A single unique value +can only have a single node in a graph. Nodes are connected by edges, where +edges have a direction and can contain a value themselves. + +In the context of Ginger, a node represents a value as expected, and the value +on an edge represents an operation to take on that value. For example: + +``` +5 -incr-> n +``` + +`5` and `n` are both nodes in the graph, with an edge going from `5` to `n` that +has the value `incr`. When it comes time to interpret the graph we say that the +value of `n` can be calculated by giving `5` as the input to the operation +`incr` (increment). In other words, the value of `n` is `6`. + +What about operations which have more than one input value? For this Ginger +introduces the tuple to its graph type. A tuple is like a node, except that it's +anonymous, which allows more than one to exist within the same graph, as they do +not share the same value. For the purposes of this blog post we'll represent +tuples like this: + +``` +1 -> } -add-> t +2 -> } +``` + +`t`'s value is the result of passing a tuple of two values, `1` and `2`, as +inputs to the operation `add`. In other words, the value of `t` is `3`. + +For the syntax being described in this post we allow that a single contiguous +graph can be represented as multiple related sections. This can be done because +each node's value is unique, so when the same value is used in disparate +sections we can merge the two sections on that value. For example, the following +two graphs are exactly equivalent (note the parenthesis wrapping the graph which +has been split): + +``` +1 -> } -add-> t -incr-> tt +2 -> } +``` + +``` +( + 1 -> } -add-> t + 2 -> } + + t -incr-> tt +) +``` + +(`tt` is `4` in both cases.) + +A tuple with only one input edge, a 1-tuple, is a no-op, semantically, but can +be useful structurally to chain multiple operations together without defining +new value names. In the above example the `t` value can be eliminated using a +1-tuple. + +``` +1 -> } -add-> } -incr-> tt +2 -> } +``` + +When an integer is used as an operation on a tuple value then the effect is to +output the value in the tuple at that index. For example: + +``` +1 -> } -0-> } -incr-> t +2 -> } +``` + +(`t` is `2`.) + +### Operations + +When a value sits on an edge it is used as an operation on the input of that +edge. Some operations will no doubt be builtin, like `add`, but users should be +able to define their own operations. This can be done using the `in` and `out` +special values. When a graph is used as an operation it is scanned for both `in` +and `out` values. `in` is set to the input value of the operation, and the value +of `out` is used as the output of the operation. + +Here we will define the `incr` operation and then use it. Note that we set the +`incr` value to be an entire sub-graph which represents the operation's body. + +``` +( in -> } -add-> out + 1 -> } ) -> incr + +5 -incr-> n +``` + +(`n` is `6`.) + +The output of an operation may itself be a tuple. Here's an implementation and +usage of `double-incr`, which increments two values at once. + +``` +( in -0-> } -incr-> } -> out + } + in -1-> } -incr-> } ) -> double-incr + +1 -> } -double-incr-> t -add-> tt +2 -> } +``` + +(`t` is a 2-tuple with values `2`, and `3`, `tt` is `5.) + +### Conditionals + +The conditional is a bit weird, and I'm not totally settled on it yet. For now +we'll use this. The `if` operation expects as an input a 2-tuple whose first +value is a boolean and whose second value will be passed along. The `if` +operation is special in that it has _two_ output edges. The first will be taken +if the boolean is true, the second if the boolean is false. The second value in +the input tuple, the one to be passed along, is used as the input to whichever +branch is taken. + +Here is an implementation and usage of `max`, which takes two numbers and +outputs the greater of the two. Note that the `if` operation has two output +edges, but our syntax doesn't represent that very cleanly. + +``` +( in -gt-> } -if-> } -0-> out + in -> } -> } -1-> out ) -> max + +1 -> } -max-> t +2 -> } +``` + +(`t` is `2`.) + +It would be simple enough to create a `switch` macro on top of `if`, to allow +for multiple conditionals to be tested at once. + +### Loops + +Loops are tricky, and I have two thoughts about how they might be accomplished. +One is to literally draw an edge from the right end of the graph back to the +left, at the point where the loop should occur, as that's conceptually what's +happening. But representing that in a text file is difficult. For now I'll +introduce the special `recur` value, and leave this whole section as TBD. + +`recur` is cousin of `in` and `out`, in that it's a special value and not an +operation. It takes whatever value it's set to and calls the current operation +with that as input. As an example, here is our now classic 0 through 9 printer +(assume `println` outputs whatever it was input): + +``` +// incr-1 is an operation which takes a 2-tuple and returns the same 2-tuple +// with the first element incremented. +( in -0-> } -incr-> } -> out + in -1-> } ) -> incr-1 + +( in -eq-> } -if-> out + in -> } -> } -0-> } -println-> } -incr-1-> } -> recur ) -> print-range + +0 -> } -print-range-> } +10 -> } +``` + +## Next Steps + +This post is long enough, and I think gives at least a basic idea of what I'm +going for. The syntax presented here is _extremely_ rudimentary, and is almost +definitely not what any final version of the syntax would look like. But the +general idea behind the structure is sound, I think. + +I have a lot of further ideas for Ginger I haven't presented here. Hopefully as +time goes on and I work on the language more some of those ideas can start +taking a more concrete shape and I can write about them. + +The next thing I need to do for Ginger is to implement (again) the graph type +for it, since the last one I implemented didn't include tuples. Maybe I can +extend it instead of re-writing it. After that it will be time to really buckle +down and figure out a syntax. Once a syntax is established then it's time to +start on the compiler! diff --git a/static/src/_posts/2021-01-14-the-web.md b/static/src/_posts/2021-01-14-the-web.md new file mode 100644 index 0000000..cae564a --- /dev/null +++ b/static/src/_posts/2021-01-14-the-web.md @@ -0,0 +1,241 @@ +--- +title: >- + The Web +description: >- + What is it good for? +series: nebula +tags: tech +--- + +With the recent crisis in the US's democratic process, there's been much abuzz +in the world about social media's undoubted role in the whole debacle. The +extent to which the algorithms of Facebook, Twitter, Youtube, TikTok, etc, have +played a role in the radicalization of large segments of the world's population +is one popular topic. Another is the tactics those same companies are now +employing to try and euthanize the monster they made so much ad money in +creating. + +I don't want to talk about any of that; there is more to the web than +social media. I want to talk about what the web could be, and to do that I want +to first talk about what it has been. + +## Web 1.0 + +In the 1950's computers were generally owned by large organizations like +companies, universities, and governments. They were used to compute and manage +large amounts of data, and each existed independently of the other. + +In the 60's protocols began to be developed which would allow them to +communicate over large distances, and thereby share resources (both +computational and informational). + +The funding of ARPANET by the US DoD led to the initial versions of the TCP/IP +protocol in the 70's, still used today as the backbone of virtually all internet +communication. Email also came about from ARPANET around this time. + +The 80s saw the growth of the internet across the world, as ARPANET gave way to +NSFNET. It was during this time that the domain name system we use today was +developed. At this point the internet use was still mostly for large +non-commercial organizations; there was little commercial footprint, and little +private access. The first commercially available ISP, which allowed access to +the internet from private homes via dialup, wasn't launched until 1989. + +And so we find ourselves in the year 1989, when Tim Berners-Lee (TBL) first +proposed the World-Wide Web (WWW, or "the web"). You can find the original +proposal, which is surprisingly short and non-technical, +[here](https://www.w3.org/Proposal.html). + +From reading TBL's proposal it's clear that what he was after was some mechanism +for hosting information on his machine in such a way that others could find and +view it, without it needing to be explicitly sent to them. He includes the +following under the "Applications" header: + +> The application of a universal hypertext system, once in place, will cover +> many areas such as document registration, on-line help, project documentation, +> news schemes and so on. + +But out of such a humble scope grew one of the most powerful forces of the 21st +century. By the end of 1990 TBL had written the first HTML/HTTP browser and +server. By the end of 1994 sites like IMDB, Yahoo, and Bianca's Smut Shack were +live and being accessed by consumers. The web grew that fast. + +In my view the characteristic of the web which catalyzed its adoption so quickly +was the place-ness of it. The web is not just a protocol for transferring +information, like email, but instead is a _place_ where that information lives. +Any one place could be freely linked to any other place, and so complex and +interesting relations could be formed between people and ideas. The +contributions people make on the web can reverberate farther than they would or +could in any other medium precisely because those contributions aren't tied to +some one-off event or a deteriorating piece of physical infrastructure, but are +instead given a home which is both permanent and everywhere. + +The other advantage of the web, at the time, was its simplicity. HTML was so +simple it was basically human-readable. A basic HTTP server could be implemented +as a hobby project by anyone in any language. Hosting your own website was a +relatively straightforward task which anyone with a computer and an ISP could +undertake. + +This was the environment early adopters of the web found themselves in. + +## Web 2.0 + +The infamous dot-com boom took place in 2001. I don't believe this was a failure +inherent in the principles of the web itself, but instead was a product of +people investing in a technology they didn't fully understand. The web, as it +was then, wasn't really designed with money-making in mind. It certainly allowed +for it, but that wasn't the use-case being addressed. + +But of course, in this world we live in, if there's money to be made, it will +certainly be made. + +By 2003 the phrase "Web 2.0" started popping up. I remember this. To me "Web +2.0" meant a new aesthetic on the web, complete with bubble buttons and centered +fix-width paragraph boxes. But what "Web 2.0" actually signified wasn't related +to any new technology or aesthetic. It was a new strategy for how companies +could enable use of the web by non-expert users, i.e. users who don't have the +inclination or means to host their own website. Web 2.0 was a strategy for +giving everyone a _place_ of their own on the web. + +"Web 2.0" was merely a label given to a movement which had already been in +motion for years. I think the following Wikipedia excerpt describes this period +best: + + +> In 2004, the term ["Web 2.0"] began its rise in popularity when O'Reilly Media +and MediaLive hosted the first Web 2.0 conference. In their opening remarks, +John Battelle and Tim O'Reilly outlined their definition of the "Web as +Platform", where software applications are built upon the Web as opposed to upon +the desktop. The unique aspect of this migration, they argued, is that +"customers are building your business for you". They argued that the +activities of users generating content (in the form of ideas, text, videos, or +pictures) could be "harnessed" to create value. + + +In other words, Web 2.0 turned the place-ness of the web into a commodity. +Rather than expect everyone to host, or arrange for the hosting, of their own +corner of the web, the technologists would do it for them for "free"! This +coincided with the increasing complexity of the underlying technology of the +web; websites grew to be flashy, interactive, and stateful applications which +_did_ things rather than be places which _held_ things. The idea of a hyperlink, +upon which the success of the web had been founded, became merely an +implementation detail. + +And so the walled gardens began to be built. Myspace was founded in 2003, +Facebook opened to the public in 2006, Digg (the precursor to reddit) was +launched in 2004, Flickr launched in 2004 (and was bought by Yahoo in 2005), +Google bought Blogger in 2003, and Twitter launched in 2006. In effect this +period both opened the web up to everyone and established the way we still use +it today. + +It's upon these foundations that current events unfold. We have platforms whose +only incentive is towards capturing new users and holding their attention, to +the exclusion of other platforms, so they can be advertised to. Users are +enticed in because they are being offered a place on the web, a place of their +own to express themselves from, in order to find out the worth of their +expressions to the rest of the world. But they aren't expressing to the world at +large, they are expressing to a social media platform, a business, and so only +the most lucrative of voices are heard. + +So much for not wanting to talk about social media. + +## Web 3.0 + +The new hot topic in crypto and hacker circles is "Web 3.0", or the +decentralized web (dweb). The idea is that we can have all the good of the +current web (the accessibility, utility, permanency, etc) without all the bad +(the centralized platforms, censorship, advertising, etc). The way forward to +this utopian dream is by building decentralized applications (dApps). + +dApps are constructed in a way where all the users of an application help to +host all the stateful content of that application. If I, as a user, post an +image to a dApp, the idea is that other users of that same dApp would lend their +meager computer resources to ensure my image is never forgotten, and in turn I +would lend mine for theirs. + +In practice building successful dApps is enormously difficult for many reasons, +and really I'm not sure there _are_ any successful ones (to date). While I +support the general sentiment behind them, I sometimes wonder about the +efficacy. What people want from the web is a place they can call their own, a +place from which they can express themselves and share their contributions with +others with all the speed and pervasiveness that the internet offers. A dApp is +just another walled garden with specific capabilities; it offers only free +hosting, not free expression. + +## Web 2.0b + +I'm not here solely to complain (just mostly). + +Thinking back to Web 1.0, and specifically to the turning point between 1.0 and +2.0, I'd like to propose that maybe we made a wrong turn. The issue at hand was +that hosting one's own site was still too much of a technical burden, and the +direction we went was towards having businesses host them for us. Perhaps there +was another way. + +What are the specific difficulties with hosting one's own site? Here are the +ones I can think of: + +* Bad tooling: basically none of the tools you're required to use (web server, + TLS, DNS, your home router) are designed for the average person. + +* Aggregiously complex languages: making a site which looks half decent and can + do the things you want requires a _lot_ of knowledge about the underlying + languages (CSS, HTML, Javascript, and whatever your server is written in). + +* Single point-of-failure: if your machine is off, your site is down. + +* Security: it's important to stay ahead of the hackers, but it takes time to + do so. + +* Hostile environment: this is separate from security, and includes difficulties + like dynamic home IPs and bad ISP policies (such as asymetric upload/download + speeds). + +These are each separate avenues of attack. + +Bad tooling is a result of the fact that devs generally build technology for +themselves or their fellow devs, and only build for others when they're being +paid to do it. This is merely an attitude problem. + +Complex languages are really a sub-category of bad tooling. The concesus seems +to be that the average person isn't interested or capable of working in +HTML/CSS/JS. This may be true today, but it wasn't always. Most of my friends in +middle and high school were well within their interest and capability to create +the most heinous MySpace pages the world has ever seen, using nothing but CSS +generators and scraps of shitty JS they found lying around. So what changed? The +tools we use to build those pages did. + +A hostile environment is not something any individual can do anything about, but +in the capitalist system we exist in we can at least hold in faith the idea that +eventually us customers will get what we want. It may take a long time, but all +monopolies break eventually, and someone will eventually sell us the internet +access we're asking for. If all other pieces are in place I think we'll have +enough people asking to make a difference. + +For single point-of-failure we have to grant that more than one person will be +involved, since the vast majority of people aren't going to be able to keep one +machine online consistently, let alone two or more machines. But I think we all +know at least one person who could keep a machine online with some reliability, +and they probably know a couple of other people who could do so as well. What +I'm proposing is that, rather than building tools for global decentralization, +we need tools for local decentralization, aka federation. We can make it +possible for a group of people to have their presence managed by a subset of +themselves. Those with the ability could help to host the online presence of +their family, friends, churches, etc, if given the right tools. + +Security is the hard one, but also in many ways isn't. What most people want +from the web is a place from which to express themselves. Expression doesn't +take much more than a static page, usually, and there's not much attacking one +can do against a static page. Additionally, we've already established that +there's going to be at least a _couple_ of technically minded people involved in +hosting this thing. + +So that's my idea that I'd like to build towards. First among these ideas is +that we need tools which can help people help each other host their content, and +on top of that foundation a new web can be built which values honest expression +rather than the lucrative madness which our current algorithms love so much. + +This project was already somewhat started by +[Cryptorado](https://github.com/Cryptorado-Community/Cryptorado-Node) while I +was a regular attendee, but since COVID started my attendance has fallen off. +Hopefully one day it can resume. In the meantime I'm going to be working on +setting up these tools for myself, and see how far I can get. diff --git a/static/src/_posts/2021-01-23-goodbye-github-pages.md b/static/src/_posts/2021-01-23-goodbye-github-pages.md new file mode 100644 index 0000000..e85ca81 --- /dev/null +++ b/static/src/_posts/2021-01-23-goodbye-github-pages.md @@ -0,0 +1,247 @@ +--- +title: >- + Goodbye, Github Pages +description: >- + This blog is no longer sponsored by Microsoft! +tags: tech +series: selfhost +--- + +Slowly but surely I'm working on moving my digital life back to being +self-hosted, and this blog was an easy low-hanging fruit to tackle. Previously +the blog was hosted on Github Pages, which was easy enough but also in many ways +restricting. By self-hosting I'm able to have a lot more control over the +generation, delivery, and functionality of the blog. + +For reference you can find the source code for the blog at +[{{site.repository}}]({{site.repository}}). Yes, it will one day be hosted +elsewhere as well. + +## Nix + +Nix is something I'm slowly picking up, but the more I use it the more it grows +on me. Rather than littering my system with ruby versions and packages I'll +never otherwise use, nix allows me to create a sandboxed build pipeline for the +blog with perfectly reproducible results. + +The first step in this process is to take the blog's existing `Gemfile.lock` and +turn it into a `gemset.nix` file, which is essentially a translation of the +`Gemfile.lock` into a file nix can understand. There's a tool called +[bundix][bundix] which does this, and it can be used from a nix shell without +having to actually install anything: + +``` + nix-shell -p bundix --run 'bundix' +``` + +The second step of using nix is to set up a nix expression in the file +`default.nix`. This will actually build the static files. As a bonus I made my +expression to also allow for serving the site locally with dynamic updating +everytime I change a source file. My `default.nix` looks like this: + +``` +{ + # pkgs refers to all "builtin" nix pkgs and utilities. By importing from a + # URL I'm able to always pin this default.nix to a specific version of those + # packages. + pkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/cd63096d6d887d689543a0b97743d28995bc9bc3.tar.gz") {}, + system ? builtins.currentSystem, +}: + + let + # bundlerEnv looks for a Gemfile, Gemfile.lock, and gemset.nix inside + # gemdir, and derives a package containing ruby and all desired gems. + ruby_env = pkgs.bundlerEnv { + name = "ruby_env"; + ruby = pkgs.ruby; + gemdir = ./.; + }; + in + { + # build will derive a package which contains the generated static + # files of the blog. It uses the build.sh file (provided below) to + # do this. + build = derivation { + name = "mediocre-blog"; + + # The build.sh file (source provided below) is executed in order + # to actually build the site. + builder = "${pkgs.bash}/bin/bash"; + args = [ ./build.sh ]; + + # ruby_env is provided as an input to build.sh so that it can + # use jekyll, and the src directory is provided so it can access + # the blog's source files. system is required by the derivation + # function, and stdenv provides standard utilities to build.sh. + inherit ruby_env system; + src = ./src; + stdenv = pkgs.stdenv; + }; + + # serve will derive an environment specifically tailored for being + # run in a nix-shell. The resulting shell will have ruby_env + # provided for it, and will automatically run the `jekyll serve` + # command to serve the blog locally. + serve = pkgs.stdenv.mkDerivation { + name = "mediocre-blog-shell"; + + # glibcLocales is required so to fill in LC_ALL and other locale + # related environment vars. Without those jekyll's scss compiler + # fails. + # + # TODO probably get rid of the scss compiler. + buildInputs = [ ruby_env pkgs.glibcLocales ]; + + shellHook = '' + exec ${ruby_env}/bin/jekyll serve -s ./src -d ./_site -w -I -D + ''; + }; + } +``` + +(Nix is a bit tricky to learn, but I highly recommend chapters 14 and 15 of [the +nix manual][manual] for an overview of the language itself, if nothing else.) + +The `build.sh` used by the nix expression to actually generate the static files +looks like this: + +```bash +# stdenv was given a dependency to build.sh, and so build.sh can use it to +# source in utilities like mkdir, which it needs. +source $stdenv/setup +set -e + +# Set up the output directory. nix provides the $out variable which will be the +# root of the derived package's filesystem, but for simplicity later we want to +# output the site within /var/www. +d="$out/var/www/blog.mediocregopher.com" +mkdir -p "$d" + +# Perform the jekyll build command. Like stdenv the ruby_env was given as a +# dependency to build.sh, so it has to explicitly use it to have access to +# jekyll. src is another explicit dependency which was given to build.sh, and +# contains all the actual source files within the src directory of the repo. +$ruby_env/bin/jekyll build -s "$src" -d "$d" +``` + +With these pieces in place I can easily regenerate the site like so: + +``` +nix-build -A build +``` + +Once run the static files will exist within a symlink called `result` in the +project's root. Within the symlink will be a `var/www/blog.mediocregopher.com` +tree of directories, and within that will be the generated static files, all +without ever having to have installed ruby. + +The expression also allows me to serve the blog while I'm working on it. Doing +so looks like this: + +``` +nix-shell -A serve +``` + +When run I get a normal jekyll process running in my `src` directory, serving +the site in real-time on port 4000, once again all without ever installing ruby. + +As a final touch I introduced a simple `Makefile` to my repo to wrap these +commands, because even these were too much for me to remember: + +``` +result: + nix-build -A build + +install: result + nix-env -i "$$(readlink result)" + +clean: + rm result + rm -rf _site + +serve: + nix-shell -A serve + +update: + nix-shell -p bundler --run 'bundler update; bundler lock; bundix; rm -rf .bundle vendor' +``` + +We'll look at that `install` target in the next section. + +## nginx + +So now I have the means to build my site quickly, reliably, and without +cluttering up the rest of my system. Time to actually serve the files. + +My home server has a docker network which houses most of my services that I run, +including nginx. nginx's primary job is to listen on ports 80 and 443, accept +HTTP requests, and direct those requests to their appropriate service based on +their `Host` header. nginx is also great at serving static content from disk, so +I'll take advantage of that for the blog. + +The one hitch is that nginx is currently running within a docker container, +as are all my other services. Ideally I would: + +* Get rid of the nginx docker container. +* Build a nix package containing nginx, all my nginx config files, and the blog + files themselves. +* Run that directly. + +Unfortunately extracting nginx from docker is dependent on doing so for all +other services as well, or at least on running all services on the host network, +which I'm not prepared to do yet. So for now I've done something janky. + +If you look at the `Makefile` above you'll notice the `install` target. What +that target does is to install the static blog files to my nix profile, which +exists at `$HOME/.nix-profile`. nix allows any package to be installed to a +profile in this way. All packages within a profile are independent and can be +added, updated, and removed atomically. By installing the built blog package to +my profile I make it available at +`$HOME/.nix-profile/var/www/blog.mediocregopher.com`. + +So to serve those files via nginx all I need to do is add a read-only volume to +the container... + +``` +-v $HOME/.nix-profile/var/www/blog.mediocregopher.com:/var/www/blog.mediocregopher.com:ro \ +``` + +...add a new virtual host to my nginx config... + +``` +server { + listen 80; + server_name blog.mediocregopher.com; + root /var/www/blog.mediocregopher.com; +} +``` + +...and finally direct the `blog` A record for `mediocregopher.com` to my home +server's IP. Cloudflare will handle TLS on port 443 for me in this case, as well +as hide my home IP, which is prudent. + +## Deploying + +So now it's time to publish this new post to the blog, what are the actual +steps? It's as easy as: + +``` +make clean install +``` + +This will remove any existing `result`, regenerate the site (with the new post) +under a new symlink, and install/update that newer package to my nix profile, +overwriting the previous package which was there. + +EDIT: apparently this isn't quite true. Because `$HOME/.nix-profile` is a +symlink docker doesn't handle the case of that symlink being updated correctly, +so I also have to do `docker restart nginx` for changes to be reflected in +nginx. + +And that's it! Nix is a cool tool that I'm still getting the hang of, but +hopefully this post might be useful to anyone else thinking of self-hosting +their site. + +[jekyll]: https://jekyllrb.com/ +[bundix]: https://github.com/nix-community/bundix +[manual]: https://nixos.org/manual/nix/stable/#chap-writing-nix-expressions diff --git a/static/src/_posts/2021-01-30-building-mobile-nebula.md b/static/src/_posts/2021-01-30-building-mobile-nebula.md new file mode 100644 index 0000000..0645e70 --- /dev/null +++ b/static/src/_posts/2021-01-30-building-mobile-nebula.md @@ -0,0 +1,390 @@ +--- +title: >- + Building Mobile Nebula +description: >- + Getting my hands dirty with Android development. +series: nebula +tags: tech +--- + +This post is going to be cheating a bit. I want to start working on adding DNS +resolver configuration to the [mobile nebula][mobile_nebula] app (if you don't +know nebula, [check it out][nebula], it's well worth knowing about), but I also +need to write a blog post for this week, so I'm combining the two exercises. +This post will essentially be my notes from my progress on today's task. + +(Protip: listen to [this][heilung] while following along to achieve the proper +open-source programming aesthetic.) + +The current mobile nebula app works very well, but it is lacking one major +feature: the ability to specify custom DNS resolvers. This is important because +I want to be able to access resources on my nebula network by their hostname, +not their IP. Android does everything in its power to make DNS configuration +impossible, and essentially the only way to actually accomplish this is by +specifying the DNS resolvers within the app. I go into more details about why +Android is broken [here][dns_issue]. + +## Setup + +Before I can make changes to the app I need to make sure I can correctly build +it in the first place, so that's the major task for today. The first step to +doing so is to install the project's dependencies. As described in the +[mobile_nebula][mobile_nebula] README, the dependencies are: + +- [`flutter`](https://flutter.dev/docs/get-started/install) +- [`gomobile`](https://godoc.org/golang.org/x/mobile/cmd/gomobile) +- [`android-studio`](https://developer.android.com/studio) +- [Enable NDK](https://developer.android.com/studio/projects/install-ndk) + +It should be noted that as of writing I haven't used any of these tools ever, +and have only done a small amount of android programming, probably 7 or 8 years +ago, so I'm going to have to walk the line between figuring out problems on the +fly and not having to completely learning these entire ecosystems; there's only +so many hours in a weekend, after all. + +I'm running [Archlinux][arch] so I install android-studio and flutter by +doing: + +```bash +yay -Sy android-studio flutter +``` + +And I install `gomobile`, according to its [documentation][gomobile] via: + +```bash +go get golang.org/x/mobile/cmd/gomobile +gomobile init +``` + +Now I startup android-studio and go through the setup wizard for it. I choose +standard setup because customized setup doesn't actually offer any interesting +options. Next android-studio spends approximately two lifetimes downloading +dependencies while my eyesight goes blurry because I'm drinking my coffee too +fast. + +It's annoying that I need to install these dependencies, especially +android-studio, in order to build this project. A future goal of mine is to nix +this whole thing up, and make a build pipeline where you can provide a full +nebula configuration file and it outputs a custom APK file for that specific +config; zero configuration required at runtime. This will be useful for +lazy/non-technical users who want to be part of the nebula network. + +Once android-studio starts up I'm not quite done yet: there's still the NDK +which must be enabled. The instructions given by the link in +[mobile_nebula][mobile_nebula]'s README explain doing this pretty well, but it's +important to install the specific version indicated in the mobile_nebula repo +(`21.0.6113669` at time of writing). Only another 1GB of dependency downloading +to go.... + +While waiting for the NDK to download I run `flutter doctor` to make sure +flutter is working, and it gives me some permissions errors. [This blog +post][flutter_blog] gives some tips on setting up, and after running the +following... + +```bash +sudo groupadd flutterusers +sudo gpasswd -a $USER flutterusers +sudo chown -R :flutterusers /opt/flutter +sudo chmod -R g+w /opt/flutter/ +newgrp flutterusers +``` + +... I'm able to run `flutter doctor`. It gives the following output: + +``` +[✓] Flutter (Channel stable, 1.22.6, on Linux, locale en_US.UTF-8) + +[!] Android toolchain - develop for Android devices (Android SDK version 30.0.3) + ✗ Android licenses not accepted. To resolve this, run: flutter doctor --android-licenses +[!] Android Studio + ✗ Flutter plugin not installed; this adds Flutter specific functionality. + ✗ Dart plugin not installed; this adds Dart specific functionality. +[!] Connected device + ! No devices available + +! Doctor found issues in 3 categories. +``` + +The first issue is easily solved as per the instructions given. The second is +solved by finding the plugin manager in android-studio and installing the +flutter plugin (which installs the dart plugin as a dependency, we call that a +twofer). + +After installing the plugin the doctor command still complains about not finding +the plugins, but the above mentioned blog post indicates to me that this is +expected. It's comforting to know that the problems indicated by the doctor may +or may not be real problems. + +The [blog post][flutter_blog] also indicates that I need `openjdk-8` installed, +so I do: + +```bash +yay -S jdk8-openjdk +``` + +And use the `archlinux-java` command to confirm that that is indeed the default +version for my shell. The [mobile_nebula][mobile_nebula] helpfully expects an +`env.sh` file to exist in the root, so if openjdk-8 wasn't already the default I +could make it so within that file. + +## Build + +At this point I think I'm ready to try actually building an APK. Thoughts and +prayers required. I run the following in a terminal, since for some reason the +`Build > Flutter > Build APK` dropdown button in android-studio did nothing. + +``` +flutter build apk +``` + +It takes quite a while to run, but in the end it errors with: + +``` +make: 'mobileNebula.aar' is up to date. +cp: cannot create regular file '../android/app/src/main/libs/mobileNebula.aar': No such file or directory + +FAILURE: Build failed with an exception. + +* Where: +Build file '/tmp/src/mobile_nebula/android/app/build.gradle' line: 95 + +* What went wrong: +A problem occurred evaluating project ':app'. +> Process 'command './gen-artifacts.sh'' finished with non-zero exit value 1 + +* Try: +Run with --stacktrace option to get the stack trace. Run with --info or --debug option to get more log output. Run with --scan to get full insights. + +* Get more help at https://help.gradle.org + +BUILD FAILED in 1s +Running Gradle task 'bundleRelease'... +Running Gradle task 'bundleRelease'... Done 1.7s +Gradle task bundleRelease failed with exit code 1 +``` + +I narrow down the problem to the `./gen-artifacts.sh` script in the repo's root, +which takes in either `android` or `ios` as an argument. Running it directly +as `./gen-artifacts.sh android` results in the same error: + +```bash +make: 'mobileNebula.aar' is up to date. +cp: cannot create regular file '../android/app/src/main/libs/mobileNebula.aar': No such file or directory +``` + +So now I gotta figure out wtf that `mobileNebula.aar` file is. The first thing I +note is that not only is that file not there, but the `libs` directory it's +supposed to be present in is also not there. So I suspect that there's a missing +build step somewhere. + +I search for the string `mobileNebula.aar` within the project using +[ag][silver_searcher] and find that it's built by `nebula/Makefile` as follows: + +```make +mobileNebula.aar: *.go + gomobile bind -trimpath -v --target=android +``` + +So that file is made by `gomobile`, good to know! Additionally the file is +actually there in the `nebula` directory, so I suspect there's just a missing +build step to move it into `android/app/src/main/libs`. Via some more `ag`-ing I +find that the code which is supposed to move the `mobileNebula.aar` file is in +the `gen-artifacts.sh` script, but that script doesn't create the `libs` folder +as it ought to. I apply the following diff: + +```bash +diff --git a/gen-artifacts.sh b/gen-artifacts.sh +index 601ed7b..4f73b4c 100755 +--- a/gen-artifacts.sh ++++ b/gen-artifacts.sh +@@ -16,7 +16,7 @@ if [ "$1" = "ios" ]; then + elif [ "$1" = "android" ]; then + # Build nebula for android + make mobileNebula.aar +- rm -rf ../android/app/src/main/libs/mobileNebula.aar ++ mkdir -p ../android/app/src/main/libs + cp mobileNebula.aar ../android/app/src/main/libs/mobileNebula.aar + + else +``` + +(The `rm -rf` isn't necessary, since a) that file is about to be overwritten by +the subsequent `cp` whether or not it's there, and b) it's just deleting a +single file so the `-rf` is an unnecessary risk). + +At this point I re-run `flutter build apk` and receive a new error. Progress! + +``` +A problem occurred evaluating root project 'android'. +> A problem occurred configuring project ':app'. + > Removing unused resources requires unused code shrinking to be turned on. See http://d.android.com/r/tools/shrink-resources.html for more information. +``` + +I recall that in the original [mobile_nebula][mobile_nebula] README it mentions +to run the `flutter build` command with the `--no-shrink` option, so I try: + +```bash +flutter build apk --no-shrink +``` + +Finally we really get somewhere. The command takes a very long time to run as it +downloads yet more dependencies (mostly android SDK stuff from the looks of it), +but unfortunately still errors out: + +``` +Execution failed for task ':app:processReleaseResources'. +> Could not resolve all files for configuration ':app:releaseRuntimeClasspath'. + > Failed to transform mobileNebula-.aar (:mobileNebula:) to match attributes {artifactType=android-compiled-dependencies-resources, org.gradle.status=integration}. + > Execution failed for AarResourcesCompilerTransform: /home/mediocregopher/.gradle/caches/transforms-2/files-2.1/735fc805916d942f5311063c106e7363/jetified-mobileNebula. + > /home/mediocregopher/.gradle/caches/transforms-2/files-2.1/735fc805916d942f5311063c106e7363/jetified-mobileNebula/AndroidManifest.xml +``` + +Time for more `ag`-ing. I find the file `android/app/build.gradle`, which has +the following block: + +``` + implementation (name:'mobileNebula', ext:'aar') { + exec { + workingDir '../../' + environment("ANDROID_NDK_HOME", android.ndkDirectory) + environment("ANDROID_HOME", android.sdkDirectory) + commandLine './gen-artifacts.sh', 'android' + } + } +``` + +I never set up the `ANDROID_HOME` or `ANDROID_NDK_HOME` environment variables, +and I suppose that if I'm running the flutter command outside of android-studio +there wouldn't be a way for flutter to know those values, so I try setting them +within my `env.sh`: + +```bash +export ANDROID_HOME=~/Android/Sdk +export ANDROID_NDK_HOME=~/Android/Sdk/ndk/21.0.6113669 +``` + +Re-running the build command still results in the same error. But it occurs to +me that I probably had built the `mobileNebula.aar` without those set +previously, so maybe it was built with the wrong NDK version or something. I +tried deleting `nebula/mobileNebula.aar` and try building again. This time... +new errors! Lots of them! Big ones and small ones! + +At this point I'm a bit fed up, and want to try a completely fresh build. I back +up my modified `env.sh` and `gen-artifacts.sh` files, delete the `mobile_nebula` +repo, re-clone it, reinstall those files, and try building again. This time just +a single error: + +``` +Execution failed for task ':app:lintVitalRelease'. +> Could not resolve all artifacts for configuration ':app:debugRuntimeClasspath'. + > Failed to transform libs.jar to match attributes {artifactType=processed-jar, org.gradle.libraryelements=jar, org.gradle.usage=java-runtime}. + > Execution failed for JetifyTransform: /tmp/src/mobile_nebula/build/app/intermediates/flutter/debug/libs.jar. + > Failed to transform '/tmp/src/mobile_nebula/build/app/intermediates/flutter/debug/libs.jar' using Jetifier. Reason: FileNotFoundException, message: /tmp/src/mobile_nebula/build/app/intermediates/flutter/debug/libs.jar (No such file or directory). (Run with --stacktrace for more details.) + Please file a bug at http://issuetracker.google.com/issues/new?component=460323. +``` + +So that's cool, apparently there's a bug with flutter and I should file a +support ticket? Well, probably not. It seems that while +`build/app/intermediates/flutter/debug/libs.jar` indeed doesn't exist in the +repo, `build/app/intermediates/flutter/release/libs.jar` _does_, so this appears +to possibly be an issue in declaring which build environment is being used. + +After some googling I found [this flutter issue][flutter_issue] related to the +error. Tldr: gradle's not playing nicely with flutter. Downgrading could help, +but apparently building with the `--debug` flag also works. I don't want to +build a release version anyway, so this sits fine with me. I run... + +```bash +flutter build apk --no-shrink --debug +``` + +And would you look at that, I got a result! + +``` +✓ Built build/app/outputs/flutter-apk/app-debug.apk. +``` + +## Install + +Building was probably the hard part, but I'm not totally out of the woods yet. +Theoretically I could email this apk to my phone or something, but I'd like +something with a faster turnover time; I need `adb`. + +I install `adb` via the `android-tools` package: + +```bash +yay -S android-tools +``` + +Before `adb` will work, however, I need to turn on USB debugging on my phone, +which I do by following [this article][usb_debugging]. Once connected I confirm +that `adb` can talk to my phone by doing: + +```bash +adb devices +``` + +And then, finally, I can install the apk: + +``` +adb install build/app/outputs/flutter-apk/app-debug.apk +``` + +NOT SO FAST! MORE ERRORS! + +``` +adb: failed to install build/app/outputs/flutter-apk/app-debug.apk: Failure [INSTALL_FAILED_UPDATE_INCOMPATIBLE: Package net.defined.mobile_nebula signatures do not match previously installed version; ignoring!] +``` + +I'm guessing this is because I already have the real nebula app installed. I +uninstall it and try again. + +AND IT WORKS!!! FUCK YEAH! + +``` +Performing Streamed Install +Success +``` + +I can open the nebula app on my phone and it works... fine. There's some +pre-existing networks already installed, which isn't the case for the Play Store +version as far as I can remember, so I suspect those are only there in the +debugging build. Unfortunately the presence of these test networks causes the +app the throw a bunch of errors because it can't contact those networks. Oh well. + +The presence of those test networks, in a way, is actually a good thing, as it +means there's probably already a starting point for what I want to do: building +a per-device nebula app with a config preloaded into it. + +## Further Steps + +Beyond continuing on towards my actual goal of adding DNS resolvers to this app, +there's a couple of other paths I could potentially go down at this point. + +* As mentioned, nixify the whole thing. I'm 99% sure the android-studio GUI + isn't actually needed at all, and I only used it for installing the CMake and + NDK plugins because I didn't bother to look up how to do it on the CLI. + +* Figuring out how to do a proper release build would be great, just for my own + education. Based on the [flutter issue][flutter_issue] it's possible that all + that's needed is to downgrade gradle, but maybe that's not so easy. + +* Get an android emulator working so that I don't have to install to my phone + everytime I want to test the app out. I'm not sure if that will also work for + the VPN aspect of the app, but it will at least help me iterate on UI changes + faster. + +But at this point I'm done for the day, I'll continue on this project some other +time. + +[mobile_nebula]: https://github.com/DefinedNet/mobile_nebula +[nebula]: https://slack.engineering/introducing-nebula-the-open-source-global-overlay-network-from-slack/ +[dns_issue]: https://github.com/DefinedNet/mobile_nebula/issues/9 +[arch]: https://archlinux.org/ +[android_wiki]: https://wiki.archlinux.org/index.php/Android#Making_/opt/android-sdk_group-writeable +[heilung]: https://youtu.be/SMJ7pxqk5d4?t=220 +[flutter_blog]: https://www.rockyourcode.com/how-to-get-flutter-and-android-working-on-arch-linux/ +[gomobile]: https://pkg.go.dev/golang.org/x/mobile/cmd/gomobile +[silver_searcher]: https://github.com/ggreer/the_silver_searcher +[flutter_issue]: https://github.com/flutter/flutter/issues/58247 +[usb_debugging]: https://www.droidviews.com/how-to-enable-developer-optionsusb-debugging-mode-on-devices-with-android-4-2-jelly-bean/ diff --git a/static/src/_posts/2021-02-06-old-code-new-ideas.md b/static/src/_posts/2021-02-06-old-code-new-ideas.md new file mode 100644 index 0000000..c495da0 --- /dev/null +++ b/static/src/_posts/2021-02-06-old-code-new-ideas.md @@ -0,0 +1,224 @@ +--- +title: >- + Old Code, New Ideas +description: >- + Looking back at my old code with bemusement and horror. +tags: tech +--- + +About 3 years ago I put a lot of effort into a set of golang packages called +[mediocre-go-lib][mediocre-go-lib]. The idea was to create a framework around +the ideas I had laid out in [this blog post][program-structure] around the +structure and composability of programs. What I found in using the framework was +that it was quite bulky, not fully thought out, and ultimately difficult for +anyone but me to use. So.... a typical framework then. + +My ideas about program structure haven't changed a ton since then, but my ideas +around the patterns which enable that structure have simplified dramatically +(see [my more recent post][component-oriented] for more on that). So in that +spirit I've decided to cut a `v2` branch of `mediocre-go-lib` and start trimming +the fat. + +This is going to be an exercise both in deleting old code (very fun) and +re-examining old code which I used to think was good but now know is bad (even +more fun), and I've been looking forward to it for some time. + +[mediocre-go-lib]: https://github.com/mediocregopher/mediocre-go-lib +[program-structure]: {% post_url 2019-08-02-program-structure-and-composability %} +[component-oriented]: {% post_url 2020-11-16-component-oriented-programming %} + +## mcmp, mctx + +The two foundational pieces of `mediocre-go-lib` are the `mcmp` and `mctx` +packages. `mcmp` primarily deals with its [mcmp.Component][component] type, +which is a key/value store which can be used by other packages to store and +retrieve component-level information. Each `mcmp.Component` exists as a node in +a tree of `mcmp.Component`s, and these form the structure of a program. +`mcmp.Component` is able to provide information about its place in that tree as +well (i.e. its path, parents, children, etc...). + +If this sounds cumbersome and of questionable utility that's because it is. It's +also not even correct, because a component in a program exists in a DAG, not a +tree. Moreover, each component can keep track of whatever data it needs for +itself using typed fields on a struct. Pretty much all other packages in +`mediocre-go-lib` depend on `mcmp` to function, but they don't _need_ to, I just +designed it that way. + +So my plan of attack is going to be to delete `mcmp` completely, and repair all +the other packages. + +The other foundational piece of `mediocre-go-lib` is [mctx][mctx]. Where `mcmp` +dealt with arbitrary key/value storage on the component level, `mctx` deals with +it on the contextual level, where each go-routine (i.e. thread) corresponds to a +`context.Context`. The primary function of `mctx` is this one: + +```go +// Annotate takes in one or more key/value pairs (kvs' length must be even) and +// returns a Context carrying them. +func Annotate(ctx context.Context, kvs ...interface{}) context.Context +``` + +I'm inclined to keep this around for now because it will be useful for logging, +but there's one change I'd like to make to it. In its current form the value of +every key/value pair must already exist before being used to annotate the +`context.Context`, but this can be cumbersome in cases where the data you'd want +to annotate is quite hefty to generate but also not necessarily going to be +used. I'd like to have the option to make annotating occur lazily. For this I +add an `Annotator` interface and a `WithAnnotator` function which takes it as an +argument, as well as some internal refactoring to make it all work right: + +```go +// Annotations is a set of key/value pairs representing a set of annotations. It +// implements the Annotator interface along with other useful post-processing +// methods. +type Annotations map[interface{}]interface{} + +// Annotator is a type which can add annotation data to an existing set of +// annotations. The Annotate method should be expected to be called in a +// non-thread-safe manner. +type Annotator interface { + Annotate(Annotations) +} + +// WithAnnotator takes in an Annotator and returns a Context which will produce +// that Annotator's annotations when the Annotations function is called. The +// Annotator will be not be evaluated until the first call to Annotations. +func WithAnnotator(ctx context.Context, annotator Annotator) context.Context +``` + +`Annotator` is designed like it is for two reasons. The more obvious design, +where the method has no arguments and returns a map, would cause a memory +allocation on every invocation, which could be a drag for long chains of +contexts whose annotations are being evaluated frequently. The obvious design +also leaves open questions about whether the returned map can be modified by +whoever receives it. The design given here dodges these problems without any +obvious drawbacks. + +The original implementation also had this unnecessary `Annotation` type: + +```go +// Annotation describes the annotation of a key/value pair made on a Context via +// the Annotate call. +type Annotation struct { + Key, Value interface{} +} +``` + +I don't know why this was ever needed, as an `Annotation` was never passed into +nor returned from any function. It was part of the type `AnnotationSet`, but +that could easily be refactored into a `map[interface{}]interface{}` instead. So +I factored `Annotation` out completely. + +[component]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mcmp#Component +[mctx]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mctx + +## mcfg, mrun + +The next package to tackle is [mcfg][mcfg], which deals with configuration via +command line arguments and environment variables. The package is set up to use +the old `mcmp.Component` type such that each component could declare its own +configuration parameters in the global configuration. In this way the +configuration would have a hierarchy of its own which matches the component +tree. + +Given that I now think `mcmp.Component` isn't the right course of action it +would be the natural step to take that aspect out of `mcfg`, leaving only a +basic command-line and environment variable parser. There are many other basic +parsers of this sort out there, including [one][flagconfig] or [two][lever] I +wrote myself, and frankly I don't think the world needs another. So `mcfg` is +going away. + +The [mrun][mrun] package is the corresponding package to `mcfg`; where `mcfg` +dealt with configuration of components `mrun` deals with the initialization and +shutdown of those same components. Like `mcfg`, `mrun` relies heavily on +`mcmp.Component`, and doesn't really have any function with that type gone. So +`mrun` is a gonner too. + +[mcfg]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mcfg +[mrun]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mrun +[flagconfig]: https://github.com/mediocregopher/flagconfig +[lever]: https://github.com/mediocregopher/lever + +## mlog + +The [mlog][mlog] package is primarily concerned with, as you might guess, +logging. While there are many useful logging packages out there none of them +integrate with `mctx`'s annotations, so it is useful to have a custom logging +package here. `mlog` also has the nice property of not being extremely coupled +to `mcmp.Component` like other packages; it's only necessary to delete a handful +of global functions which aren't a direct part of the `mlog.Logger` type in +order to free the package from that burden. + +With that said, the `mlog.Logger` type could still use some work. It's primary +pattern looks like this: + +```go +// Message describes a message to be logged. +type Message struct { + Level + Description string + Contexts []context.Context +} + +// Info logs an InfoLevel message. +func (l *Logger) Info(descr string, ctxs ...context.Context) { + l.Log(mkMsg(InfoLevel, descr, ctxs...)) +} +``` + +The idea was that if the user has multiple `Contexts` in hand, each one possibly +having some relevant annotations, all of those `Context`s' annotations could be +merged together for the log entry. + +Looking back it seems to me that the only thing `mlog` should care about is the +annotations, and not _where_ those annotations came from. So the new pattern +looks like this: + +```go +// Message describes a message to be logged. +type Message struct { + Context context.Context + Level + Description string + Annotators []Annotators +} + +// Info logs a LevelInfo message. +func (l *Logger) Info(ctx context.Context, descr string, annotators ...mctx.Annotator) +``` + +The annotations on the given `Context` will be included, and then any further +`Annotator`s can be added on. This will leave room for `merr` later. + +There's some other warts in `mlog.Logger` that should be dealt with as well, +including some extraneous methods which were only used due to `mcmp.Component`, +some poorly named types, a message handler which didn't properly clean itself +up, and making `NewLogger` take in parameters with which it can be customized as +needed (previously it only allowed for a single configuration). I've also +extended `Message` to include a timestamp, a namespace field, and some other +useful information. + +[mlog]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mlog + +## Future Work + +I've run out of time for today, but future work on this package includes: + +* Updating [merr][merr] with support for `mctx.Annotations`. +* Auditing the [mnet][mnet], [mhttp][mhttp], and [mrpc][mrpc] packages to see if + they contain anything worth keeping. +* Probably deleting the [m][m] package entirely; I don't even really remember + what it does. +* Probably deleting the [mdb][mdb] package entirely; it only makes sense in the + context of `mcmp.Component`. +* Making a difficult decision about [mtest][mtest]; I put a lot of work into it, + but is it really any better than [testify][testify]? + +[merr]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/merr +[mnet]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mnet +[mhttp]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mhttp +[mrpc]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mrpc +[m]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/m +[mdb]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mdb +[mtest]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mtest +[testify]: https://github.com/stretchr/testify diff --git a/static/src/_posts/2021-02-13-building-gomobile-using-nix.md b/static/src/_posts/2021-02-13-building-gomobile-using-nix.md new file mode 100644 index 0000000..3326266 --- /dev/null +++ b/static/src/_posts/2021-02-13-building-gomobile-using-nix.md @@ -0,0 +1,232 @@ +--- +title: >- + Building gomobile Using Nix +description: >- + Harder than I thought it would be! +series: nebula +tags: tech +--- + +When I last left off with the nebula project I wanted to [nix][nix]-ify the +build process for Cryptic's [mobile_nebula][mobile_nebula] fork. While I've made +progress on the overall build, one particular bit of it really held me up, so +I'm writing about that part here. I'll finish the full build at a later time. + +## gomobile + +[gomobile][gomobile] is a toolkit for the go programming language to allow for +running go code on Android and iOS devices. `mobile_nebula` uses `gomobile` to +build a simple wrapper around the nebula client that the mobile app can then +hook into. + +This means that in order to nix-ify the entire `mobile_nebula` project I first +need to nix-ify `gomobile`, and since there isn't (at time of writing) an +existing package for `gomobile` in the nixpkgs repo, I had to roll my own. + +I started with a simple `buildGoModule` nix expression: + +``` +pkgs.buildGoModule { + pname = "gomobile"; + version = "unstable-2020-12-17"; + src = pkgs.fetchFromGitHub { + owner = "golang"; + repo = "mobile"; + rev = "e6ae53a27f4fd7cfa2943f2ae47b96cba8eb01c9"; + sha256 = "03dzis3xkj0abcm4k95w2zd4l9ygn0rhkj56bzxbcpwa7idqhd62"; + }; + vendorSha256 = "1n1338vqkc1n8cy94501n7jn3qbr28q9d9zxnq2b4rxsqjfc9l94"; +} +``` + +The basic idea here is that `buildGoModule` will acquire a specific revision of +the `gomobile` source code from github, then attempt to build it. However, +`gomobile` is a special beast in that it requires a number of C/C++ libraries in +order to be built. I discovered this upon running this expression, when I +received this error: + +``` +./work.h:12:10: fatal error: GLES3/gl3.h: No such file or directory + 12 | #include // install on Ubuntu with: sudo apt-get install libegl1-mesa-dev libgles2-mesa-dev libx11-dev +``` + +This stumped me for a bit, as I couldn't figure out a) the "right" place to +source the `GLES3` header file from, and b) how to properly hook that into the +`buildGoModule` expression. My initial attempts involved trying to include +versions of the header file from my `androidsdk` nix package which I had already +gotten (mostly) working, but the version which ships there appears to expect to +be using clang. `cgo` (go's compiler which is used for C/C++ interop) only +supports gcc, so that strategy failed. + +I didn't like having to import the header file from `androidsdk` anyway, as it +meant that my `gomobile` would only work within the context of the +`mobile_nebula` project, rather than being a standalone utility. + +## nix-index + +At this point I flailed around some more trying to figure out where to get this +header file from. Eventually I stumbled on the [nix-index][nix-index] project, +which implements something similar to the `locate` utility on linux: you give it +a file pattern, and it searches your active nix channels for any packages which +provide a file matching that pattern. + +Since nix is amazing it's not actually necessary to install `nix-index`, I +simply start up a shell with the package available using `nix-shell -p +nix-index`. On first run I needed to populate the index by running the +`nix-index` command, which took some time, but after that finding packages which +provide the file I need is as easy as: + +``` +> nix-shell -p nix-index +[nix-shell:/tmp]$ nix-locate GLES3/gl3.h +(zulip.out) 82,674 r /nix/store/wbfw7w2ixdp317wip77d4ji834v1k1b9-libglvnd-1.3.2-dev/include/GLES3/gl3.h +libglvnd.dev 82,674 r /nix/store/pghxzmnmxdcarg5bj3js9csz0h85g08m-libglvnd-1.3.2-dev/include/GLES3/gl3.h +emscripten.out 82,666 r /nix/store/x3c4y2h5rn1jawybk48r6glzs1jl029s-emscripten-2.0.1/share/emscripten/system/include/GLES3/gl3.h +``` + +So my mystery file is provided by a few packages, but `libglvnd.dev` stood out +to me as it's also the pacman package which provides the same file in my real +operating system: + +``` +> yay -Qo /usr/include/GLES3/gl3.h +/usr/include/GLES3/gl3.h is owned by libglvnd 1.3.2-1 +``` + +This gave me some confidence that this was the right track. + +## cgo + +My next fight was with `cgo` itself. Go's build process provides a few different +entry points for C/C++ compiler/linker flags, including both environment +variables and command-line arguments. But I wasn't using `go build` directly, +instead I was working through nix's `buildGoModule` wrapper. This added a huge +layer of confusion as all of nixpkgs is pretty terribly documented, so you +really have to just divine behavior from the [source][buildGoModule-source] +(good luck). + +After lots of debugging (hint: `NIX_DEBUG=1`) I determined that all which is +actually needed is to set the `CGO_CFLAGS` variable within the `buildGoModule` +arguments. This would translate to the `CGO_CFLAGS` environment variable being +set during all internal commands, and whatever `go build` commands get used +would pick up my compiler flags from that. + +My new nix expression looked like this: + +``` +pkgs.buildGoModule { + pname = "gomobile"; + version = "unstable-2020-12-17"; + src = pkgs.fetchFromGitHub { + owner = "golang"; + repo = "mobile"; + rev = "e6ae53a27f4fd7cfa2943f2ae47b96cba8eb01c9"; + sha256 = "03dzis3xkj0abcm4k95w2zd4l9ygn0rhkj56bzxbcpwa7idqhd62"; + }; + vendorSha256 = "1n1338vqkc1n8cy94501n7jn3qbr28q9d9zxnq2b4rxsqjfc9l94"; + + CGO_CFLAGS = [ + "-I ${pkgs.libglvnd.dev}/include" + ]; +} +``` + +Running this produced a new error. Progress! The new error was: + +``` +/nix/store/p792j5f44l3f0xi7ai5jllwnxqwnka88-binutils-2.31.1/bin/ld: cannot find -lGLESv2 +collect2: error: ld returned 1 exit status +``` + +So pretty similar to the previous issue, but this time the linker wasn't finding +a library file rather than the compiler not finding a header file. Once again I +used `nix-index`'s `nix-locate` command to find that this library file is +provided by the `libglvnd` package (as opposed to `libglvnd.dev`, which provided +the header file). + +Adding `libglvnd` to the `CGO_CFLAGS` did not work, as it turns out that flags +for the linker `cgo` uses get passed in via `CGO_LDFLAGS` (makes sense). After +adding this new variable I got yet another error; this time `X11/Xlib.h` was not +able to be found. I repeated the process of `nix-locate`/add to `CGO_*FLAGS` a +few more times until all dependencies were accounted for. The new nix expression +looked like this: + +``` +pkgs.buildGoModule { + pname = "gomobile"; + version = "unstable-2020-12-17"; + src = pkgs.fetchFromGitHub { + owner = "golang"; + repo = "mobile"; + rev = "e6ae53a27f4fd7cfa2943f2ae47b96cba8eb01c9"; + sha256 = "03dzis3xkj0abcm4k95w2zd4l9ygn0rhkj56bzxbcpwa7idqhd62"; + }; + vendorSha256 = "1n1338vqkc1n8cy94501n7jn3qbr28q9d9zxnq2b4rxsqjfc9l94"; + + CGO_CFLAGS = [ + "-I ${pkgs.libglvnd.dev}/include" + "-I ${pkgs.xlibs.libX11.dev}/include" + "-I ${pkgs.xlibs.xorgproto}/include" + "-I ${pkgs.openal}/include" + ]; + + CGO_LDFLAGS = [ + "-L ${pkgs.libglvnd}/lib" + "-L ${pkgs.xlibs.libX11}/lib" + "-L ${pkgs.openal}/lib" + ]; +} +``` + +## Tests + +The `CGO_*FLAGS` variables took care of all compiler/linker errors, but there +was one issue left: `buildGoModule` apparently runs the project's tests after +the build phase. `gomobile`'s tests were actually mostly passing, but some +failed due to trying to copy files around, which nix was having none of. After +some more [buildGoModule source][buildGoModule-source] divination I found that +if I passed an empty `checkPhase` argument it would skip the check phase, and +therefore skip running these tests. + +## Fin! + +The final nix expression looks like so: + +``` +pkgs.buildGoModule { + pname = "gomobile"; + version = "unstable-2020-12-17"; + src = pkgs.fetchFromGitHub { + owner = "golang"; + repo = "mobile"; + rev = "e6ae53a27f4fd7cfa2943f2ae47b96cba8eb01c9"; + sha256 = "03dzis3xkj0abcm4k95w2zd4l9ygn0rhkj56bzxbcpwa7idqhd62"; + }; + vendorSha256 = "1n1338vqkc1n8cy94501n7jn3qbr28q9d9zxnq2b4rxsqjfc9l94"; + + CGO_CFLAGS = [ + "-I ${pkgs.libglvnd.dev}/include" + "-I ${pkgs.xlibs.libX11.dev}/include" + "-I ${pkgs.xlibs.xorgproto}/include" + "-I ${pkgs.openal}/include" + ]; + + CGO_LDFLAGS = [ + "-L ${pkgs.libglvnd}/lib" + "-L ${pkgs.xlibs.libX11}/lib" + "-L ${pkgs.openal}/lib" + ]; + + checkPhase = ""; +} +``` + +Once I complete the nix-ification of `mobile_nebula` I'll submit a PR to the +nixpkgs upstream with this, so that others can have `gomobile` available as +well! + +[nix]: https://nixos.org/manual/nix/stable/ +[mobile_nebula]: https://github.com/cryptic-io/mobile_nebula +[gomobile]: https://github.com/golang/mobile +[nix-index]: https://github.com/bennofs/nix-index +[buildGoModule-source]: https://github.com/NixOS/nixpkgs/blob/26117ed4b78020252e49fe75f562378063471f71/pkgs/development/go-modules/generic/default.nix diff --git a/static/src/_posts/2021-02-25-married.md b/static/src/_posts/2021-02-25-married.md new file mode 100644 index 0000000..a44e044 --- /dev/null +++ b/static/src/_posts/2021-02-25-married.md @@ -0,0 +1,18 @@ +--- +title: >- + Married! +description: >- + We did it! +--- + +Just us, an aspen grove, and a photographer to witness. Between weather and +foot-traffic on the trail everything went as well as we hoped it would; it was a +wonderful day. + +{% include image.html dir="wedding" file="1.jpg" width=4005 %} + +{% include image.html dir="wedding" file="2.jpg" width=4004 %} + +{% include image.html dir="wedding" file="3.jpg" width=4005 %} + +More pictures coming soon to a website near you! diff --git a/static/src/_posts/2021-03-01-conditionals-in-ginger.md b/static/src/_posts/2021-03-01-conditionals-in-ginger.md new file mode 100644 index 0000000..a8c6e44 --- /dev/null +++ b/static/src/_posts/2021-03-01-conditionals-in-ginger.md @@ -0,0 +1,195 @@ +--- +title: >- + Conditionals in Ginger +description: >- + Some different options for how "if" statements could work. +series: ginger +tags: tech +--- + +In the [last ginger post][last] I covered a broad overview of how I envisioned +ginger would work as a language, but there were two areas where I felt there was +some uncertainty: conditionals and loops. In this post I will be focusing on +conditionals, and going over a couple of options for how they could work. + +[last]: {% post_url 2021-01-09-ginger %} + +## Preface + +By "conditional" I'm referring to what programmers generally know as the "if" +statement; some mechanism by which code can do one thing or another based on +circumstances at runtime. Without some form of a conditional a programming +language is not Turing-complete and can't be used for anything interesting. + +Given that it's uncommon to have a loop without some kind of a conditional +inside of it (usually to exit the loop), but it's quite common to have a +conditional with no loop in sight, it makes more sense to cover conditionals +before loops. Whatever decision is reached regarding conditionals will impact +how loops work, but not necessarily the other way around. + +For the duration of this post I will be attempting to construct a simple +operation which takes two integers as arguments. If the first is less than +the second then the operation returns the addition of the two, otherwise the +operation returns the second subtracted from the first. In `go` this operation +would look like: + +```go +func op(a, b int) int { + if a < b { + return a + b + } + return b - a +} +``` + +## Pattern 1: Branches As Inputs + +The pattern I'll lay out here is simultaneously the first pattern which came to +me when trying to figure this problem out, the pattern which is most like +existing mainstream programming languages, and (in my opinion) the worst pattern +of the bunch. Here is what it looks like: + +``` + in -lt-> } -if-> out + } + in -add-> } + } +in -1-> } } +in -0-> } -sub-> } + +``` + +The idea here is that the operation `if` could take a 3-tuple whose elements +are, respectively: a boolean, and two other edges which won't be evaluated until +`if` is evaluated. If the boolean is true then `if` outputs the output of the +first edge (the second element in the tuple), and otherwise it will output the +value of the second edge. + +This idea doesn't work for a couple reasons. The biggest is that, if there were +multiple levels of `if` statements, the structure of the graph grows out +_leftward_, whereas the flow of data is rightwards. For someone reading the code +to know what `if` will produce in either case they must first backtrack through +the graph, find the origin of that branch, then track that leftward once again +to the `if`. + +The other reason this doesn't work is because it doesn't jive with any pattern +for loops I've come up with. This isn't evident from this particular example, +but consider what this would look like if either branch of the `if` needed to +loop back to a previous point in the codepath. If that's a difficult or +confusing task for you, you're not alone. + +## Pattern 2: Pattern Matching + +There's quite a few languages with pattern matching, and even one which I know +of (erlang) where pattern matching is the primary form of conditionals, and the +more common `if` statement is just some syntactic sugar on top of the pattern +matching. + +I've considered pattern matching for ginger. It might look something like: + +{% raw %} +``` + in -> } -switch-> } -> {{{A, B}, _}, ({A,B}-lt->out)} -0-> } -add-> out +in -1-> } -> } } -1-> } -sub-> out +in -0-> } +``` +{% endraw %} + +The `switch` operation posits that a node can have multiple output edges. In a +graph this is fine, but it's worth noting. Graphs tend to be implemented such +that edges to and from a node are unordered, but in ginger it seems unlikely +that that will be the case. + +The last output edge from the switch is the easiest to explain: it outputs the +input value to `switch` when no other branches are able to be taken. But the +input to `switch` is a bit complex in this example: It's a 2-tuple whose first +element is `in`, and whose second element is `in` but with reversed elements. +In the last output edge we immediately pipe into a `1` operation to retrieve +that second element and call `sub` on that, since that's the required behavior +of the example. + +All other branches (in this switch there is only one, the first branch) output +to a value. The form of this value is a tuple (denoted by enclosed curly braces +here) of two values. The first value is the pattern itself, and the second is an +optional predicate. The pattern in this example will match a 2-tuple, ignoring +the second element in that tuple. The first element will itself be matched +against a 2-tuple, and assign each element to the variables `A` and `B`, +respectively. The second element in the tuple, the predicate, is a sub-graph +which returns a boolean, and can be used for further specificity which can't be +covered by the pattern matching (in this case, comparing the two values to each +other). + +The output from any of `switch`'s branches is the same as its input value, the +only question is which branch is taken. This means that there's no backtracking +when reading a program using this pattern; no matter where you're looking you +will only have to keep reading rightward to come to an `out`. + +There's a few drawbacks with this approach. The first is that it's not actually +very easy to read. While pattern matching can be a really nice feature in +languages that design around it, I've never seen it used in a LISP-style +language where the syntax denotes actual datastructures, and I feel that in such +a context it's a bit unwieldy. I could be wrong. + +The second drawback is that pattern matching is not simple to implement, and I'm +not even sure what it would look like in a language where graphs are the primary +datastructure. In the above example we're only matching into a tuple, but how +would you format the pattern for a multi-node, multi-edge graph? Perhaps it's +possible. But given that any such system could be implemented as a macro on top +of normal `if` statements, rather than doing it the other way around, it seems +better to start with the simpler option. + +(I haven't talked about it yet, but I'd like for ginger to be portable to +multiple backends (i.e. different processor architectures, vms, etc). If the +builtins of the language are complex, then doing this will be a difficult task, +whereas if I'm conscious of that goal during design I think it can be made to be +very simple. In that light I'd prefer to not require pattern matching to be a +builtin.) + +The third drawback is that the input to the `switch` requires careful ordering, +especially in cases like this one where a different value is needed depending on +which branch is taken. I don't consider this to be a huge drawback, as +encourages good data design and is a common consideration in other functional +languages. + +## Pattern 3: Branches As Outputs + +Taking a cue from the pattern matching example, we can go back to `if` and take +advantage of multiple output edges being a possibility: + +``` + in -> } -> } -if-> } -0-> } -add-> out +in -1-> } -> } } } -1-> } -sub-> out +in -0-> } } + } + in -lt-> } +``` + +It's not perfect, but I'd say this is the nicest of the three options so far. +`if` is an operation which takes a 2-tuple. The second element of the tuple is a +boolean, if the boolean is true then `if` passes the first element of its tuple +to the first branch, otherwise it passes it to the second. In this way `if` +becomes kind of like a fork in a train track: it accepts some payload (the first +element of its input tuple) and depending on conditions (the second element) it +directs the payload one way or the other. + +This pattern retains the benefits of the pattern matching example, where one +never needs to backtrack in order to understand what is about to happen next, +while also being much more readable and simpler to implement. It also retains +one of the drawbacks of the pattern matching example, in that the inputs to `if` +must be carefully organized based on the needs of the output branches. As +before, I don't consider this to be a huge drawback. + +There's other modifications which might be made to this `if` to make it even +cleaner, e.g. one could make it accept a 3-tuple, rather than a 2-tuple, in +order to supply differing values to be used depending on which branch is taken. +To me these sorts of small niceties are better left to be implemented as macros, +built on top of a simpler but less pleasant builtin. + +## Fin + +If you have other ideas around how conditionals might be done in a graph-based +language please [email me][email]; any and all contributions are welcome! One +day I'll get around to actually implementing some of ginger, but today is not +that day. + +[email]: mailto:mediocregopher@gmail.com diff --git a/static/src/_posts/2021-03-04-conditionals-in-ginger-errata.md b/static/src/_posts/2021-03-04-conditionals-in-ginger-errata.md new file mode 100644 index 0000000..b4c0007 --- /dev/null +++ b/static/src/_posts/2021-03-04-conditionals-in-ginger-errata.md @@ -0,0 +1,195 @@ +--- +title: >- + Conditionals in Ginger, Errata +description: >- + Too clever by half. +series: ginger +tags: tech +--- + +After publishing the last post in the series I walked away from my computer +feeling that I was very clever and had made a good post. This was incorrect. + +To summarize [the previous post][prev], it's not obvious which is the best way +to structure conditionals in a graphical programming language. My favorite +solution looked something like this: + +``` + in -> } -> } -if-> } -0-> } -add-> out +in -1-> } -> } } } -1-> } -sub-> out +in -0-> } } + } + in -lt-> } +``` + +Essentially an `if` operator which accepts a value and a boolean, and which has +two output edges. If the boolean is true then the input value is sent along the +first output edge, and if it's false it's sent along the second. + +This structure is not possible, given the properties of ginger graphs that have +been laid out in [other posts in the series][other]. + +## Nodes, Tuples, and Edges + +A ginger graph, as it has been presented so far, is composed of these three +elements. A node has a value, and its value is unique to the graph; if two nodes +have the same value then they are the same node. Edges connect two nodes or +tuples together, and have a value and direction. Tuples are, in essence, a node +whose value is its input edges. + +The `if` operation above lies on an edge, not a node or tuple. It cannot have +multiple output edges, since it cannot have any edges at all. It _is_ an edge. + +So it's back to the drawing board, to some extent. But luckily I've got some +more ideas in my back pocket. + +## Forks and Junctions + +In an older conception of ginger there was no tuple, but instead there were +forks and junctions. A junction was essentially the same as a tuple, just named +differently: a node whose value is its input edges. A fork was just the +opposite, a node whose value is its output edges. Junctions and forks naturally +complimented each other, but ultimately I didn't find forks to be useful for +much because there weren't cases where it was necessary to have a single edge be +split across multiple output edges directly; any case which appeared to require +a fork could be satisfied by directing the edge into a 1-tuple and using the +output edges of the 1-tuple. + +But now we have such a case. The 1-tuple won't work, because the `if` operator +would only see the 1-tuple, not its edges. It could be supposed that the graph +interpreter could say that an `if` operation must be followed by a 1-tuple, and +that the 1-tuple's output edges have a special meaning in that circumstance. But +making the output edges of a 1-tuple have different meaning in different +circumstances isn't very elegant. + +So a fork might be just the thing here. For the example I will represent a +fork as the opposite of a tuple: a vertical column of `{` characters. + +``` + in -> } -> } -if-> { -0-> } -add-> out +in -1-> } -> } } { -1-> } -sub-> out +in -0-> } } + } + in -lt-> } +``` + +It _looks_ elegant, which is nice. I am curious though if there's any other +possible use-case where a fork might be useful... if there's not then it seems +odd to introduce an entire new element just to support a single operation. Why +not just make that operation itself the new element? + +## Switch it Up + +In most conceptions of a flowchart that I've seen a conditional is usually +represented as a node with a different shape than the other nodes (often a +diamond). Ginger could borrow this idea for itself, and declare a new graph +element, alongside nodes, tuples, and edges, called a switch. + +Let's say a switch is simply represented by a `-<>`, and acts like a node in all +aspects except that it has no value and is not unique to the graph. + +The example presented in the [previous post][prev] would look something like +this: + +``` + in -> } -> } -<> -0-> } -add-> out +in -1-> } -> } } -1-> } -sub-> out +in -0-> } } + } + in -lt-> } +``` + +This isn't the _worst_. Like the fork it's adding a new element, but that +element's existence is required and its usage is very specific to that +requirement, whereas the fork's existence is required but ambiguously useful +outside of that requirement. + +On the other hand, there are macros to consider... + +## Macrophillic + +Ginger will certainly support macros, and as alluded to in the last post I'd +like even conditional operations to be fair game for those who want to construct +their own more complex operators. In the context of the switch `-<>` element, +would someone be able to create something like a pattern matching conditional? +If the builtin conditional is implemented as a new graph element then it seems +that the primary way to implement a custom conditional macro will also involve a +new graph element. + +While I'm not flat out opposed to allowing for custom graph elements, I'm +extremely skeptical that it's necessary, and would like it to be proven +necessary before considering it. So if we can have a basic conditional, _and_ +custom conditional macros built on top of the same broadly useful element, that +seems like the better strategy. + +So all of that said, it seems I'm leaning towards forks as the better strategy +in this. But I'd like a different name. "Fork" was nice as being the compliment +of a "junction", but I like "tuple" way more than "junction" because the term +applies well both to the structural element _and_ to the transformation that +element performs (i.e. a tuple element combines its input edges' values into a +tuple value). But "tuple" and "fork" seem weird together... + +## Many Minutes Later... + +A brief search of the internet reveals no better word than "fork". A place +where a tree's trunk splits into two separate trunks is called a "fork". A +place where a river splits into two separate rivers is called a "fork". +Similarly with roads. And that _is_ what's happening, from the point of view of +the graph's structure: it is an element whose only purpose is to denote multiple +outward edges. + +So "fork" it is. + +## Other considerations + +A 1-tuple is interesting in that it acts essentially as a concatenation of two +edges. A 1-fork could, theoretically, do the same thing: + +``` +a -foo-> } -bar-> b + +c -far-> { -boo-> d +``` + +The top uses a tuple, the bottom a fork. Each is, conceptually, valid, but I +don't like that two different elements can be used for the exact same use-case. + +A 1-tuple is an established concept in data structures, so I am loath to give it +up. A 1-fork, on the other hand, doesn't make sense structurally (would you +point to any random point on a river and call it a "1-fork"?), and fork as a +whole doesn't really have any analog in the realm of data structures. So I'm +prepared to declare 1-forks invalid from the viewpoint of the language +interpreter. + +Another consideration: I already expect that there's going to be confusion as to +when to use a fork and when to use multiple outputs from a node. For example, +here's a graph which uses a fork: + +``` +a -> { -op1-> foo + { -op2-> bar +``` + +and here's a graph which has multiple outputs from the same node: + +``` +a -op1-> foo + -op2-> bar +``` + +Each could be interpreted to mean the same thing: "set `foo` to the result of +passing `a` into `op1`, and set `bar` to the result of passing `a` into `op2`." +As with the 1-tuple vs 1-fork issue, we have another case where the same +task might be accomplished with two different patterns. This case is trickier +though, and I don't have as confident an answer. + +I think an interim rule which could be put in place, subject to review later, is +that multiple edges from a node or tuple indicate that that same value is being +used for multiple operations, while a fork indicates something specific to the +operation on its input edge. It's not a pretty rule, but I think it will do. + +Stay tuned for next week when I realize that actually all of this is wrong and +we start over again! + +[prev]: {% post_url 2021-03-01-conditionals-in-ginger %} +[other]: {% post_url 2021-01-09-ginger %} diff --git a/static/src/_posts/2021-03-12-ripple-a-game.md b/static/src/_posts/2021-03-12-ripple-a-game.md new file mode 100644 index 0000000..e793e6e --- /dev/null +++ b/static/src/_posts/2021-03-12-ripple-a-game.md @@ -0,0 +1,311 @@ +--- +title: >- + Ripple: A Game +description: >- + Hop Till You Drop! +tags: tech +series: ripple +--- + +

+ Movement: Arrow keys or WASD
+ Jump: Space
+ Goal: Jump as many times as possible without touching a ripple!
+
+ Press Jump To Begin! +

+ + +Your browser doesn't support canvas. At this point in the world that's actually +pretty cool, well done! + + +Score: + 0 + + + + +_Do you have the patience to wait
+till your mud settles and the water is clear?_ + +## Backstory + +This is a game I originally implemented in lua, which you can find [here][orig]. +It's a fun concept that I wanted to show off again, as well as to see if I could +whip it up in an evening in javascript (I can!) + +Send me your high scores! I top out around 17. + +[orig]: https://github.com/mediocregopher/ripple diff --git a/static/src/_posts/2021-03-20-a-simple-rule-for-better-errors.md b/static/src/_posts/2021-03-20-a-simple-rule-for-better-errors.md new file mode 100644 index 0000000..30139fb --- /dev/null +++ b/static/src/_posts/2021-03-20-a-simple-rule-for-better-errors.md @@ -0,0 +1,227 @@ +--- +title: >- + A Simple Rule for Better Errors +description: >- + ...and some examples of the rule in action. +tags: tech +--- + +This post will describe a simple rule for writing error messages that I've +been using for some time and have found to be worthwhile. Using this rule I can +be sure that my errors are propagated upwards with everything needed to debug +problems, while not containing tons of extraneous or duplicate information. + +This rule is not specific to any particular language, pattern of error +propagation (e.g. exceptions, signals, simple strings), or method of embedding +information in errors (e.g. key/value pairs, formatted strings). + +I do not claim to have invented this system, I'm just describing it. + +## The Rule + +Without more ado, here's the rule: + +> A function sending back an error should not include information the caller +> could already know. + +Pretty simple, really, but the best rules are. Keeping to this rule will result +in error messages which, once propagated up to their final destination (usually +some kind of logger), will contain only the information relevant to the error +itself, with minimal duplication. + +The reason this rule works in tandem with good encapsulation of function +behavior. The caller of a function knows only the inputs to the function and, in +general terms, what the function is going to do with those inputs. If the +returned error only includes information outside of those two things then the +caller knows everything it needs to know about the error, and can continue on to +propagate that error up the stack (with more information tacked on if necessary) +or handle it in some other way. + +## Examples + +(For examples I'll use Go, but as previously mentioned this rule will be useful +in any other language as well.) + +Let's go through a few examples, to show the various ways that this rule can +manifest in actual code. + +**Example 1: Nothing to add** + +In this example we have a function which merely wraps a call to `io.Copy` for +two files: + +```go +func copyFile(dst, src *os.File) error { + _, err := io.Copy(dst, src) + return err +} +``` + +In this example there's no need to modify the error from `io.Copy` before +returning it to the caller. What would we even add? The caller already knows +which files were involved in the error, and that the error was encountered +during some kind of copy operation (since that's what the function says it +does), so there's nothing more to say about it. + +**Example 2: Annotating which step an error occurs at** + +In this example we will open a file, read its contents, and return them as a +string: + +```go +func readFile(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", fmt.Errorf("opening file: %w", err) + } + defer f.Close() + + contents, err := io.ReadAll(f) + if err != nil { + return "", fmt.Errorf("reading contents: %w", err) + } + + return string(contents), nil +} +``` + +In this example there are two different steps which could result in an error: +opening the file and reading its contents. If an error is returned then our +imaginary caller doesn't know which step the error occurred at. Using our rule +we can infer that it would be good to annotate at _which_ step the error is +from, so the caller is able to have a fuller picture of what went wrong. + +Note that each annotation does _not_ include the file path which was passed into +the function. The caller already knows this path, so an error being returned +back which reiterates the path is unnecessary. + +**Example 3: Annotating which argument was involved** + +In this example we will read two files using our function from example 2, and +return the concatenation of their contents as a string. + +```go +func concatFiles(pathA, pathB string) (string, error) { + contentsA, err := readFile(pathA) + if err != nil { + return "", fmt.Errorf("reading contents of %q: %w", pathA, err) + } + + contentsB, err := readFile(pathB) + if err != nil { + return "", fmt.Errorf("reading contents of %q: %w", pathB, err) + } + + return contentsA + contentsB, nil +} +``` + +Like in example 2 we annotate each error, but instead of annotating the action +we annotate which file path was involved in each error. This is because if we +simply annotated with the string `reading contents` like before it wouldn't be +clear to the caller _which_ file's contents couldn't be read. Therefore we +include which path the error is relevant to. + +**Example 4: Layering** + +In this example we will show how using this rule habitually results in easy to +read errors which contain all relevant information surrounding the error. Our +example reads one file, the "full" file, using our `readFile` function from +example 2. It then reads the concatenation of two files, the "split" files, +using our `concatFiles` function from example 3. It finally determines if the +two strings are equal: + +```go +func verifySplits(fullFilePath, splitFilePathA, splitFilePathB string) error { + fullContents, err := readFile(fullFilePath) + if err != nil { + return fmt.Errorf("reading contents of full file: %w", err) + } + + splitContents, err := concatFiles(splitFilePathA, splitFilePathB) + if err != nil { + return fmt.Errorf("reading concatenation of split files: %w", err) + } + + if fullContents != splitContents { + return errors.New("full file's contents do not match the split files' contents") + } + + return nil +} +``` + +As previously, we don't annotate the file paths for the different possible +errors, but instead say _which_ files were involved. The caller already knows +the paths, there's no need to reiterate them if there's another way of referring +to them. + +Let's see what our errors actually look like! We run our new function using the +following: + +```go + err := verifySplits("full.txt", "splitA.txt", "splitB.txt") + fmt.Println(err) +``` + +Let's say `full.txt` doesn't exist, we'll get the following error: + +``` +reading contents of full file: opening file: open full.txt: no such file or directory +``` + +The error is simple, and gives you everything you need to understand what went +wrong: while attempting to read the full file, during the opening of that file, +our code found that there was no such file. In fact, the error returned by +`os.Open` contains the name of the file, which goes against our rule, but it's +the standard library so what can ya do? + +Now, let's say that `splitA.txt` doesn't exist, then we'll get this error: + +``` +reading concatenation of split files: reading contents of "splitA.txt": opening file: open splitA.txt: no such file or directory +``` + +Now we did include the file path here, and so the standard library's failure to +follow our rule is causing us some repitition. But overall, within the parts of +the error we have control over, the error is concise and gives you everything +you need to know what happened. + +## Exceptions + +As with all rules, there are certainly exceptions. The primary one I've found is +that certain helper functions can benefit from bending this rule a bit. For +example, if there is a helper function which is called to verify some kind of +user input in many places, it can be helpful to include that input value within +the error returned from the helper function: + +```go +func verifyInput(str string) error { + if err := check(str); err != nil { + return fmt.Errorf("input %q was bad: %w", str, err) + } + return nil +} +``` + +`str` is known to the caller so, according to our rule, we don't need to include +it in the error. But if you're going to end up wrapping the error returned from +`verifyInput` with `str` at every call site anyway it can be convenient to save +some energy and break the rule. It's a trade-off, convenience in exchange for +consistency. + +Another exception might be made with regards to stack traces. + +In the set of examples given above I tended to annotate each error being +returned with a description of where in the function the error was being +returned from. If your language automatically includes some kind of stack trace +with every error, and if you find that you are generally able to reconcile that +stack trace with actual code, then it may be that annotating each error site is +unnecessary, except when annotating actual runtime values (e.g. an input +string). + +As in all things with programming, there are no hard rules; everything is up to +interpretation and the specific use-case being worked on. That said, I hope what +I've laid out here will prove generally useful to you, in whatever way you might +try to use it. + diff --git a/static/src/_posts/2021-04-01-fmail.md b/static/src/_posts/2021-04-01-fmail.md new file mode 100644 index 0000000..bdc473c --- /dev/null +++ b/static/src/_posts/2021-04-01-fmail.md @@ -0,0 +1,172 @@ +--- +title: >- + F-Mail +description: >- + If email didn't suck. +--- + +I'm down a blog post, so I'm going to try to make up some time on this one. + +Email is probably the oldest web technology which is widely recognized by the +general public. It predates WWW by about 15 years, and is fundamental to the way +we use the internet. + +It also really fucking sucks. + +## Thought Exercise + +Let's invent email all over again, for fun. We can take the good things from the +existing email paradigm, and replace the bad. Let's not worry about marketshare +and adoption strategies and all that annoying stuff either; after all, I need to +finish this post in like.... 20 minutes... tops. + +This new email will be called fmail. + +The basic idea of email is solid. It's mail, on the internet. We all understand +mail. You have a mailing address, I want to send you a thing. I pay someone else +to take my thing to you, and they have some mechanism for finding you just based +on your address. + +We're good so far. Let's get into the weeds. + +## Addresses + +Email addresses are... ok. There's a name and a domain. If you were sending a +physical package to a house with multiple residents you would include the name +of the recipient on the package, in addition to the address. With email the +domain part of the email corresponds to the house address, and the username +corresponds to the recipient's actual name. + +In this aspect, however, physical mail has email beat. If the package has a +correct name it can often be routed directly to its intended recipient. But it +doesn't _have_ to have a correct name. In fact it can have no name. In those +cases the residents of the address figure out amongst themselves what to do with +it. Maybe it's obvious who it's for, maybe not. In any case it's possible to +resolve these issues. + +Further, in physical mail the routing steps are declared right on the mail +container (box, envelope, etc). You can, generally, read the recipient address +from bottom to top to understand how to deliver it. Here's an example: + +``` +Homer +123 Fakie St +Springfield, IL 12345 +USA +``` + +Understanding the steps is simple enough. The package first needs to get to the +United States of America, then to Springfield, then to Fakie St, then to house +123 on Fakie St, and finally to the resident named "Homer" at that house. + +Let's incorporate these ideas into fmail, our new mythical internet mail system. + +In fmail the address isn't an inflexible `name@domain`. Instead the address is +composed of a sequence of `>` separated strings, each denoting an intended hop +in the route. For example: + +``` +sick-domain.com>brian>phone +``` + +The sender only needs to know how to route to the first hop in order to do its +duty. In this case it's a simple domain lookup, which would tell it an IP to +send the fmail message to. From there the receiving server would need to know +what to do with `brian` as a piece of routing information. Maybe it knows, and +can send the message along. Maybe it doesn't, in which case the mail might go to +a "lost and found" directory, where anyone on the fmail server could claim it. + +If the idea of a domain-wide "lost and found" sounds scary, consider that it +might not be so scary in a world where fmail servers are easy to self-host, and +so people actually do so. What would make it possible for fmail to be easy to +self-host? + +## Spam + +Spam has made both email and real mail almost unbearable. If I'm honest, it's +the daily chore of cleaning my two mail boxes that made start thinking about +writing this post in the first place. With email the spam issue is particularly +egregious, because the entire email ecosystem, not just the experience of the +individual, is made worse by spam. + +If you want to know why it's hard to run your email server, the answer is +"because spam exists". You need to block the spam destined for you server, you +need to ensure someone isn't going to hack your server and send spam from it, +you need to convince other email servers that you're one of the good ones and +won't send spam, you need to pray your ISP even allows you to have an email +server (because they don't want to be seen as enabling spam). There's actual +_laws_ about email spam. + +The good news is, fmail has solved the spam problem completely. + +In fmail, all messages are rejected by default. It's a whitelist based access +control, unlike email's blacklist based one where anyone can send you anything +and it's up to you to reject what you don't want. + +How can this work? There's a couple different forms the whitelist can take, and +they all can work together in your fmail server's configuration. + +The primary one would be to check for some kind of cryptographic signature on +the message, declaring who its from. If the message is from a list of configured +"good senders" then it's kept. This would be for friends, family, coworkers, +etc... Those you expect to hear from frequently who you actually want to hear +from. + +Building on this, each "good sender" could have a timeout associated with them, +if desired. This could be useful when signing up for a website which wants to +use fmail for authentication. You configure your fmail client (which of course +integrates nicely with a web browser to make this easy) to allow messages from +this sender only for a limited time, or only a limited number of messages from +them. This way the user can receive their fmail confirmation message, or +password reset or whatever, without being forever bothered by stupid marketing +emails. + +A secondary method of whitelisting might involve someone attaching some +cryptocurrency to their message as a peace offering of sorts. It could be as +simple as a private key or signed transaction which would allow the receiver, if +they receive the message, to keep the money. It would be up to the fmail client +to allow configuration of which cryptos are accepted and how much crypto is +required, as well as ensuring that the money is still available to be received. +Only if all these requirements are met is the message allowed to be seen by a +human, otherwise it's dropped. + +There's probably other interesting mechanisms I haven't thought of. It would be +good for fmail servers to have a plugin system that allowed for extending +functionality like this as the users desire. + +## Encryption + +One thing email sorely lacks is end-to-end encryption. This is a difficult +problem for communication systems in general, because ultimately what it comes +down to is a hard requirement on a safe exchange of public keys, which requires +an existing trusted method of communication. + +I don't think fmail needs to re-invent this wheel. We've already established +that users will have some mechanism for sharing public keys (for whitelisting), +so really what this comes down to is having good UI around key management from +the start, and the stubbornness to establish e2e messages as the norm. + +What holds email back in this area isn't so much the lack of solutions (there +are many ways to do e2e encryption over email) but the need for supporting +plaintext emails out of concern for backwards compatibility, as well as the need +to support open mail boxes which can receive and send mail willy-nilly. If a +whitelist-based system is built from scratch with e2e messages always being the +default way of messaging others, and plaintext messages being something with big +scary warnings around it, I don't think there'd be an issue. + +## That's fmail + +That's it. There's not much to it, except you know... actually implementing it +(someone else do it, I don't have time). + +There's a lot more that could be said about the email protocol and server/client +implementations themselves, but I think if one were to start from scratch on +fmail it would be enough to say this: there's a lot of good things to take from +email, and really what we need is to update the mindset around internet +messaging in general.We have almost 8 billion people on earth, a double digit +percentage of them have internet access, and we need to give users better +mechanisms for ensuring their messages are received the way each one +individually wants them to be. + +My dream of finishing this post in 20 minutes did not come to pass. It was more +like an hour. I'm getting faster though! diff --git a/static/src/_posts/2021-04-06-evaluation-of-network-filesystems.md b/static/src/_posts/2021-04-06-evaluation-of-network-filesystems.md new file mode 100644 index 0000000..b80eb8d --- /dev/null +++ b/static/src/_posts/2021-04-06-evaluation-of-network-filesystems.md @@ -0,0 +1,339 @@ +--- +title: >- + Evaluation of Network Filesystems +description: >- + There can only be one. +series: nebula +tags: tech +--- + +It's been a bit since updating my progress on what I've been lately calling the +"cryptic nebula" project. When I last left off I was working on building the +[mobile nebula][mobile_nebula] using [nix][nix]. For the moment I gave up on +that dream, as flutter and nix just _really_ don't get along and I don't want to +get to distracted on problems that aren't critical to the actual goal. + +Instead I'd like to pursue the next critical component of the system, and +that's a shared filesystem. The use-case I'm ultimately trying to achieve is: + +* All hosts communicate with each other via the nebula network. +* All hosts are personal machines owned by individuals, _not_ cloud VMs. +* A handful of hosts are always-on, or at least as always-on as can be achieved + in a home environment. +* All hosts are able to read/write to a shared filesystem, which is mounted via + FUSE (or some other mechanism, though I can't imagine what) on their computer. +* Top-level directories within the shared filesystem can be restricted, so + that only a certain person (or host) can read/write to them. + +What I'm looking for is some kind of network filesystem, of which there are +_many_. This document will attempt to evaluate all relevant projects and come up +with the next steps. It may be that no project fits the bill perfectly, and that +I'm stuck either modifying an existing project to my needs or, if things are +looking really dire, starting a new project. + +The ultimate use-case here is something like a self-hosted, distributed [keybase +filesystem](https://book.keybase.io/docs/files); somewhere where individuals in +the cluster can back up their personal projects, share files with each other, +and possibly even be used as the base layer for more complex applications on +top. + +The individuals involved shouldn't have to deal with configuring their +distributed FS, either to read from it or add storage resources to it. Ideally +the FS process can be bundled together with the nebula process and run opaquely; +the user is just running their "cryptic nebula" process and everything else is +handled in the background. + +## Low Pass Filter + +There are some criteria for these projects that I'm not willing to compromise +on; these criteria will form a low pass filter which, hopefully, will narrow our +search appreciably. + +The network filesystem used by the cryptic nebula must: + +* Be able to operate over a nebula network (obviously). +* Be open-source. The license doesn't matter, as long as the code is available. +* Run on both Mac and Linux. +* Not require a third-party to function. +* Allows for a replication factor of 3. +* Supports sharding of data (ie each host need not have the entire dataset). +* Allow for mounting a FUSE filesystem in any hosts' machine to interact with + the network filesystem. +* Not run in the JVM, or any other VM which is memory-greedy. + +The last may come across as mean, but the reason for it is that I forsee the +network filesystem client running on users' personal laptops, which cannot be +assumed to have resources to spare. + +## Rubric + +Each criteria in the next set lies along a spectrum. Any project may meet one of +thses criteria fully, partially, or not at all. For each criteria I assign a +point value according to how fully a project meets the criteria, and then sum up +the points to give the project a final score. The project with the highest final +score is not necessarily the winner, but this system should at least give some +good candidates for final consideration. + +The criteria, and their associated points values, are: + +* **Hackability**: is the source-code of the project approachable? + - 0: No + - 1: Kind of, and there's not much of a community. + - 2: Kind of, but there is an active community. + - 3: Yes + +* **Documentation**: is the project well documented? + - 0: No docs. + - 1: Incomplete or out-of-date docs. + - 2: Very well documented. + +* **Transience**: how does the system handle hosts appearing or disappearing? + - 0: Requires an automated system to be built to handle adding/removing + hosts. + - 1: Gracefully handled. + +* **Priority**: is it possible to give certain hosts priority when choosing + which will host/replicate some piece of data? + - 0: No. + - 1: Yes. + +* **Caching**: will hosts reading a file have that file cached locally for the + next reading (until the file is modified)? + - 0: No. + - 1: Yes. + +* **Conflicts**: if two hosts updated the same file at the same time, how is + that handled? + - 0: The file can no longer be updated. + - 1: One update clobbers the other, or both go through in an undefined + order. + - 2: One update is disallowed. + - 3: A copy of the file containing the "losing" update is created (ie: how + dropbox does it). + - 4: Strategy can be configured on the file/directory level. + +* **Consistency**: how does the system handle a file being changed frequently? + - 0: File changes must be propagated before subsequent updates are allowed (fully consistent). + - 1: Files are snapshotted at some large-ish interval (eventually consistent). + - 2: File state (ie content hash, last modifid, etc) is propagated + frequently but contents are only fully propagated once the file has + "settled" (eventually consistent with debounce). + +* **POSIX**: how POSIX compliant is the mounted fileystem? + - 0: Only the most basic features are implemented. + - 1: Some extra features are implemented. + - 2: Fully POSIX compliant. + +* **Scale**: how many hosts can be a part of the cluster? + - 0: A finite number. + - 1: A finite number of dedicated hosts, infinite ephemeral. + - 2: Infinite hosts. + +* **Failure**: how does the system handle failures (network partitions, hosts + hanging, buggy client versions)? + - 0: Data loss. + - 1: Reads and writes are halted. + - 2: Reads are allowed but writes are halted. + - 3: System is partially read/write, except effected parts. + +* **Limitations**: are there limits on how big files can be, or how big + directories can be? + - 0: Files are limited to below 1TB in size. + - 1: Directories are limited to below 100,000 files. + - 2: No limits. + +* **Encryption**: how is data encrypted? + - 0: Not at all, DIY. + - 1: Encrypted at rest. + - 2: Per-user encryption. + +* **Permissions**: how are modifications to data restricted? + - 0: Not at all. + - 1: Permissions are only superifically enforced. + - 2: Fully enforced user/group restrictions, complex patterns, and/or POSIX ACLs. + +* **Administration**: how much administration is required for the system to + function? + - 0: Frequent. + - 1: Infrequent. + - 2: Essentially none. + +* **Simplicity**: how understandable is the system as a whole? + - 0: Very complex. + - 1: Understandable with some study. + - 2: Very simple, easy to predict. + +* **Visibility**: how much visibility is available into processes within the + system? + - 0: Total black box. + - 1: Basic logging. + - 2: CLI tooling. + - 3: Exportable metrics (e.g. prometheus). + +## Evaluations + +With the rubric defined, let's start actually working through our options! There +are many, many different possibilities, so this may not be an exhaustive list. + +### [Ceph](https://docs.ceph.com/en/latest/cephfs/index.html) + +> The Ceph File System, or CephFS, is a POSIX-compliant file system built on +> top of Ceph’s distributed object store, RADOS. CephFS endeavors to provide a +> state-of-the-art, multi-use, highly available, and performant file store for +> a variety of applications, including traditional use-cases like shared home +> directories, HPC scratch space, and distributed workflow shared storage. + +- Hackability: 2. Very active community, but it's C++. +- Documentation: 2. Hella docs, very daunting. +- Transience: 0. Adding hosts seems to require multiple configuration steps. +- Priority: 1. There is fine-tuning on a per-host basis. +- Caching: 1. Clients can cache both metadata and block data. +- Conflicts: 1. The FS behaves as much like a real FS as possible. +- Consistency: 0. System is CP. +- POSIX: 2. Fully POSIX compliant. +- Scale: 2. Cluster can grow without any real bounds. +- Failure: 3. There's no indication anywhere that Ceph goes into any kind of cluster-wide failure mode. +- Limitations: 2. There are performance considerations with large directories, but no hard limits. +- Encryption: 0. None to speak of. +- Permissions: 2. POSIX ACLs supported. +- Administration: 1. This is a guess, but Ceph seems to be self-healing in general, but still needs hand-holding in certain situations (adding/removing nodes, etc...) +- Simplicity: 0. There are many moving pieces, as well as many different kinds of processes and entities. +- Visibility: 3. Lots of tooling to dig into the state of the cluster, as well as a prometheus module. + +TOTAL: 22 + +#### Comments + +Ceph has been recommended to me by a few people. It is clearly a very mature +project, though that maturity has brought with it a lot of complexity. A lot of +the complexity of Ceph seems to be rooted in its strong consistency guarantees, +which I'm confident it fulfills well, but are not really needed for the +use-case I'm interested in. I'd prefer a simpler, eventually consistent, +system. It's also not clear to me that Ceph would even perform very well in my +use-case as it seems to want an actual datacenter deployment, with beefy +hardware and hosts which are generally close together. + +### [GlusterFS](https://docs.gluster.org/en/latest/) + +> GlusterFS is a scalable network filesystem suitable for data-intensive tasks +> such as cloud storage and media streaming. GlusterFS is free and open source +> software and can utilize common off-the-shelf hardware. + +- Hackability: 2. Mostly C code, but there is an active community. +- Documentation: 2. Good docs. +- Transience: 0. New nodes cannot add themselves to the pool. +- Priority: 0. Data is distributed based on consistent hashing algo, nothing else. +- Caching: 1. Docs mention client-side caching layer. +- Conflicts: 0. File becomes frozen, manual intervention is needed to save it. +- Consistency: 0. Gluster aims to be fully consistent. +- POSIX: 2. Fully POSIX compliant. +- Scale: 2. No apparent limits. +- Failure: 3. Clients determine on their own whether or not they have a quorum for a particular sub-volume. +- Limitations: 2. Limited by the file system underlying each volume, I think. +- Encryption: 2. Encryption can be done on the volume level, each user could have a private volume. +- Permissions: 2. ACL checking is enforced on the server-side, but requires syncing of users and group membership across servers. +- Administration: 1. Beyond adding/removing nodes the system is fairly self-healing. +- Simplicity: 1. There's only one kind of server process, and the configuration of volumes is is well documented and straightforward. +- Visibility: 3. Prometheus exporter available. + +TOTAL: 23 + +#### Comments + +GlusterFS was my initial choice when I did a brief survey of DFSs for this +use-case. However, after further digging into it I think it will suffer the +same ultimate problem as CephFS: too much consistency for a wide-area +application like I'm envisioning. The need for syncing user/groups across +machines as actual system users is also cumbersome enough to make it not a +great choice. + +### [MooseFS](https://moosefs.com/) + +> MooseFS is a Petabyte Open Source Network Distributed File System. It is easy +> to deploy and maintain, highly reliable, fault tolerant, highly performing, +> easily scalable and POSIX compliant. +> +> MooseFS spreads data over a number of commodity servers, which are visible to +> the user as one resource. For standard file operations MooseFS acts like +> ordinary Unix-like file system. + +- Hackability: 2. All C code, pretty dense, but backed by a company. +- Documentation: 2. There's a giant PDF you can read through like a book. I + guess that's.... good? +- Transience: 0. Nodes must be added manually. +- Priority: 1. There's "Storage Classes". +- Caching: 1. Caching is done on the client, and there's some synchronization + with the master server around it. +- Conflicts: 1. Both update operations will go through. +- Consistency: 0. Afaict it's a fully consistent system, with a master server + being used to synchronize changes. +- POSIX: 2. Fully POSIX compliant. +- Scale: 2. Cluster can grow without any real bounds. +- Failure: 1. If the master server is unreachable then the client can't + function. +- Limitations: 2. Limits are very large, effectively no limit. +- Encryption: 0. Docs make no mention of encryption. +- Permissions: 1. Afaict permissions are done by the OS on the fuse mount. +- Administration: 1. It seems that if the topology is stable there shouldn't be + much going on. +- Simplicity: 0. There are many moving pieces, as well as many different kinds of processes and entities. +- Visibility: 2. Lots of cli tooling, no prometheus metrics that I could find. + +TOTAL: 17 + +Overall MooseFS seems to me like a poor-developer's Ceph. It can do exactly the +same things, but with less of a community around it. The sale's pitch and +feature-gating also don't ingratiate it to me. The most damning "feature" is the +master metadata server, which acts as a SPOF and only sort of supports +replication (but not failover, unless you get Pro). + +## Cutting Room Floor + +The following projects were intended to be reviewed, but didn't make the cut for +various reasons. + +* Tahoe-LAFS: The FUSE mount (which is actually an SFTP mount) doesn't support + mutable files. + +* HekaFS: Doesn't appear to exist anymore(?) + +* IPFS-cluster: Doesn't support sharding. + +* MinFS: Seems to only work off S3, no longer maintained anyway. + +* DRDB: Linux specific, no mac support. + +* BeeGFS: No mac support (I don't think? I couldn't find any indication it + supports macs at any rate). + +* NFS: No support for sharding the dataset. + +## Conclusions + +Going through the featuresets of all these different projects really helped me +focus in on how I actually expect this system to function, and a few things +stood out to me: + +* Perfect consistency is not a goal, and is ultimately harmful for this + use-case. The FS needs to propagate changes relatively quickly, but if two + different hosts are updating the same file it's not necessary to synchronize + those updates like a local filesystem would; just let one changeset clobber + the other and let the outer application deal with coordination. + +* Permissions are extremely important, and yet for all these projects are + generally an afterthought. In a distributed setting we can't rely on the OS + user/groups of a host to permission read/write access. Instead that must be + done primarily via e2e encryption. + +* Transience is not something most of these project expect, but is a hard + requirement of this use-case. In the long run we need something which can be + run on home hardware on home ISPs, which is not reliable at all. Hosts need to + be able to flit in and out of existence, and the cluster as a whole needs to + self-heal through that process. + +In the end, it may be necessary to roll our own project for this, as I don't +think any of the existing distributed file systems are suitable for what's +needed. + +[mobile_nebula]: https://github.com/cryptic-io/mobile_nebula +[nix]: https://nixos.org/manual/nix/stable/ diff --git a/static/src/_posts/2021-04-11-ripple-v2.md b/static/src/_posts/2021-04-11-ripple-v2.md new file mode 100644 index 0000000..cbde032 --- /dev/null +++ b/static/src/_posts/2021-04-11-ripple-v2.md @@ -0,0 +1,436 @@ +--- +title: >- + Ripple V2: A Better Game +description: >- + The sequel no one was waiting for! +tags: tech +series: ripple +--- + +

+ Movement: Arrow keys or WASD
+ Jump: Space
+ Goal: Jump as many times as possible without touching a ripple!
+
+ Press Jump To Begin! +

+ +_Who can make the muddy water clear?
+Let it be still, and it will gradually become clear._ + + +Your browser doesn't support canvas. At this point in the world that's actually +pretty cool, well done! + + +Score: + 0 + + + + +## Changelog + +There's been two major changes to the mechanics of the game since the previous +version: + +* A new ripple is created _only_ if there are no ripples on the field already, + or if the player has jumped over an existing ripple. + +* The score is increased only if a ripple is created, and is increased by the + number of bounces off the wall that ripple will have. Put another way, the + score is increased based on how high you jump. + +Other small changes include: + +* Ripple growth rate has been modified. It's now harder for a player to run into + the ripple they just created. + +* Ripple thickness indicates how many bounces are left in the ripple. This was + the case previously, but it's been made more obvious. + +* Small performance improvements. diff --git a/static/src/_posts/2021-04-22-composing-processes-into-a-static-binary-with-nix.md b/static/src/_posts/2021-04-22-composing-processes-into-a-static-binary-with-nix.md new file mode 100644 index 0000000..885d56b --- /dev/null +++ b/static/src/_posts/2021-04-22-composing-processes-into-a-static-binary-with-nix.md @@ -0,0 +1,248 @@ +--- +title: >- + Composing Processes Into a Static Binary With Nix +description: >- + Goodbye, docker-compose! +--- + +It's pretty frequent that one wants to use a project that requires multiple +processes running. For example, a small web api which uses some database to +store data in, or a networking utility which has some monitoring process which +can be run alongside it. + +In these cases it's extremely helpful to be able to compose these disparate +processes together into a single process. From the user's perspective it's much +nicer to only have to manage one process (even if it has hidden child +processes). From a dev's perspective the alternatives are: finding libraries in +the same language which do the disparate tasks and composing them into the same +process via import, or (if such libraries don't exist, which is likely) +rewriting the functionality of all processes into a new, monolithic project +which does everything; a huge waste of effort! + +## docker-compose + +A tool I've used before for process composition is +[docker-compose][docker-compose]. While it works well for composition, it +suffers from the same issues docker in general suffers from: annoying networking +quirks, a questionable security model, and the need to run the docker daemon. +While these issues are generally surmountable for a developer or sysadmin, they +are not suitable for a general-purpose project which will be shipped to average +users. + +## nix-bundle + +Enter [nix-bundle][nix-bundle]. This tools will take any [nix][nix] derivation +and construct a single static binary out of it, a la [AppImage][appimage]. +Combined with a process management tool like [circus][circus], nix-bundle +becomes a very useful tool for composing processes together! + +To demonstrate this, we'll be looking at putting together a project I wrote +called [markov][markov], a simple REST API for building [markov +chains][markov-chain] which is written in [go][golang] and backed by +[redis][redis]. + +## Step 1: Building Individual Components + +Step one is to get [markov][markov] and its dependencies into a state where it +can be run with [nix][nix]. Doing this is fairly simple, we merely use the +`buildGoModule` function: + +``` +pkgs.buildGoModule { + pname = "markov"; + version = "618b666484566de71f2d59114d011ff4621cf375"; + src = pkgs.fetchFromGitHub { + owner = "mediocregopher"; + repo = "markov"; + rev = "618b666484566de71f2d59114d011ff4621cf375"; + sha256 = "1sx9dr1q3vr3q8nyx3965x6259iyl85591vx815g1xacygv4i4fg"; + }; + vendorSha256 = "048wygrmv26fsnypsp6vxf89z3j0gs9f1w4i63khx7h134yxhbc6"; +} +``` + +This expression results in a derivation which places the markov binary at +`bin/markov`. + +The other component we need to run markov is [redis][redis], which conveniently +is already packaged in nixpkgs as `pkg.redis`. + +## Step 2: Composing Using Circus + +[Circus][circus] can be configured to run multiple processes at the same time. +It will collect the stdout/stderr logs of these processes and combine them into +a single stream, or write them to log files. If any processes fail circus will +automatically restart them. It has a simple configuration and is, overall, a +great tool for a simple project like this. + +Circus also comes pre-packed in nixpkgs, so we don't need to do anything to +actually build it. We only need to configure it. To do this we'll write a bash +script which generates the configuration on-the-fly, and then runs the process +with that configuration. + +This script is going to act as the "frontend" for our eventual static binary; +the user will pass in configuration parameters to this script, and this script +will translate those into the appropriate configuration for all sub-process +(markov, redis, circus). For this demo we won't go nuts with the configuration, +we'll just expose the following: + +* `MARKOV_LISTEN_ADDR`: Address REST API will listen on (defaults to + `localhost:8000`). + +* `MARKOV_TIMEOUT`: Expiration time of each link of the chain (defaults to 720 + hours). + +* `MARKOV_DATA_DIR`: Directory where data will be stored (defaults to current + working directory). + +The bash script will take these params in as environment variables. The nix +expression to generate the bash script, which we'll call our entrypoint script, +will look like this (assumes that the expression to generate `bin/markov`, +defined above, is set to the `markov` variable): + +``` +pkgs.writeScriptBin "markov" '' + #!${pkgs.stdenv.shell} + + # On every run we create new, temporary, configuration files for redis and + # circus. To do this we create a new config directory. + markovCfgDir=$(${pkgs.coreutils}/bin/mktemp -d) + echo "generating configuration to $markovCfgDir" + + cat >$markovCfgDir/redis.conf <$markovCfgDir/circus.ini <- + Loops in Ginger +description: >- + Bringing it back around. +series: ginger +tags: tech +--- + +In previous posts in this series I went over the general idea of the ginger +programming language, and some of its properties. To recap: + +* Ginger is a programming language whose syntax defines a directed graph, in the + same way that a LISP language's syntax defines nested lists. + +* Graph edges indicate an operation, while nodes indicate a value. + +* The special values `in` and `out` are used when interpreting a graph as a + function. + +* A special node type, the tuple, is defined as being a node whose value is an + ordered set of input edges. + +* Another special node type, the fork, is the complement to the tuple. A fork is + defined as being a node whose value is an ordered set of output edges. + +* The special `if` operation accepts a 2-tuple, the first value being some state + value and the second being a tuple. The `if` operation expects to be directed + towards a 2-fork. If the boolean is true then the top output edge of the fork + is taken, otherwise the bottom is taken. The state value is what's passed to + the taken edge. + +There were some other detail rules but I don't remember them off the top of my +head. + +## Loops + +Today I'd like to go over my ideas for how loops would work in ginger. With +loops established ginger would officially be a Turing complete language and, +given time and energy, real work could actually begin on it. + +As with conditionals I'll start by establishing a base example. Let's say we'd +like to define an operation which prints out numbers from 0 up to `n`, where `n` +is given as an argument. In go this would look like: + +```go +func printRange(n int) int { + for i := 0; i < n; i++ { + fmt.Println(i) + } +} +``` + +With that established, let's start looking at different patterns. + +## Goto + +In the olden days the primary looping construct was `goto`, which essentially +teleports the program counter (aka instruction pointer) to another place in the +execution stack. Pretty much any other looping construct can be derived from +`goto` and some kind of conditional, so it's a good starting place when +considering loops in ginger. + +``` +(in -println-> } -incr-> out) -> println-incr + +0 -> } -> } -if-> { -> out +in -> } -eq-> } { -> } -upd-> } -+ + ^ 0 -> } | + | println-incr -> } | + | | + +--------------------------------+ +``` + +(Note: the `upd` operation is used here for convenience. It takes in three +arguments: A tuple, an index, and an operation. It applies the operation to the +tuple element at the given index, and returns a new tuple with that index set to +the value returned.) + +Here `goto` is performed using a literal arrow going from the right to left. +it's ugly and hard to write, and would only be moreso the more possible gotos an +operation has. + +It also complicates our graphs in a significant way: up till now ginger graphs +have have always been directed _acyclic_ graphs (DAGs), but by introducing this +construct we allow that graphs might be cyclic. It's not immediately clear to me +what the consequences of this will be, but I'm sure they will be great. If +nothign else it will make the compiler much more complex, as each value can no +longer be defined in terms of its input edge, as that edge might resolve back to +the value itself. + +While conceptually sound, I think this strategy fails the practability test. We +can do better. + +## While + +The `while` construct is the basic looping primitive of iterative languages +(some call it `for`, but they're just lying to themselves). + +Try as I might, I can't come up with a way to make `while` work with ginger. +`while` ultimately relies on scoped variables being updated in place to +function, while ginger is based on the concept of pipelining a set of values +through a series of operations. From the point of view of the programmer these +operations are essentially immutable, so the requirement of a variable which can +be updated in place cannot be met. + +## Recur + +This pattern is based on how many functional languages, for example erlang, +handle looping. Rather than introducing new primitives around looping, these +language instead ensure that tail calls are properly optimized and uses those +instead. So loops are implemented as recursive function calls. + +For ginger to do this it would make sense to introduce a new special value, +`recur`, which could be used alongside `in` and `out` within operations. When +the execution path hits a `recur` then it gets teleported back to the `in` +value, with the input to `recur` now being the output from `in`. Usage of it +would look like: + +``` +( + + (in -println-> } -incr-> out) -> println-incr + + in -> } -if-> { -> out + in -eq-> } { -> } -upd-> } -> recur + 0 -> } + println-incr -> } + +) -> inner-op + +0 -> } -inner-op-> out +in -> } +``` + +This looks pretty similar to the `goto` example overall, but with the major +difference that the looping body had to be wrapped into an inner operation. The +reason for this is that the outer operation only takes in one argument, `n`, but +the loop actually needs two pieces of state to function: `n` and the current +value. So the inner operation loops over these two pieces of state, and the +outer operation supplies `n` and an initial iteration value (`0`) to that inner +operation. + +This seems cumbersome on the surface, but what other languages do (such as +erlang, which is the one I'm most familiar with) is to provide built-in macros +on top of this primitive which make it more pleasant to use. These include +function polymorphism and a more familiar `for` construct. With a decent macro +capability ginger could do the same. + +The benefits here are that the graphs remain acyclic, and the syntax has not +been made more cumbersome. It follows conventions established by other +languages, and ensures the language will be capable of tail-recursion. + +## Map/Reduce + +Another functional strategy which is useful is that of the map/reduce power +couple. The `map` operation takes a sequence of values and an operation, and +returns a sequence of the same length where the operation has been applied to +each value in the original sequence individually. The `reduce` operation is more +complicated (and not necessary for out example), but it's essentially a +mechanism to turn a sequence of values into a single value. + +For our example we only need `map`, plus one more helper operation: `range`. +`range` takes a number `n` and returns a sequence of numbers starting at `0` and +ending at `n-1`. Our print example now looks like: + +``` +in -range-> } -map-> out + println -> } +``` + +Very simple! Map/reduce is a well established pattern and is probably the +best way to construct functional programs. However, the question remains whether +these are the best _primitives_ for looping, and I don't believe they are. Both +`map` and `reduce` can be derived from conditional and looping primitives like +`if` and `recur`, and they can't do some things that those primitives can. While + + +I expect one of the first things which will be done in ginger is to define `map` +and `reduce` in terms of `if` and a looping primitive, and use them generously +throughout the code, I think the fact that they can be defined in terms of +lower-level primitives indicates that they aren't the right looping primitives +for ginger. + +## Conclusion + +Unlike with the conditionals posts, where I started out not really knowing what +I wanted to do with conditionals, I more or less knew where this post was going +from the beginning. `recur` is, in my mind, the best primitive for looping in +ginger. It provides the flexibility to be extended to any use-case, while not +complicating the structure of the language. While possibly cumbersome to +implement directly, `recur` can be used as a primitive to construct more +convenient looping operations like `map` and `reduce`. + +As a final treat (lucky you!), here's `map` defined using `if` and `recur`: + +``` +( + in -0-> mapped-seq + in -1-> orig-seq + in -2-> op + + mapped-seq -len-> i + + mapped-seq -> } -if-> { -> out + orig-seq -len-> } -eq-> } { -> } -append-> } -> recur + i -> } } } + } } + orig-seq -i-> } -op-> } } + } + orig-seq -> } + op -> } +) -> inner-map + + () -> } -inner-map-> out +in -0-> } +in -1-> } +``` + +The next step for ginger is going to be writing an actual implementation of the +graph structure in some other language (let's be honest, it'll be in go). After +that we'll need a syntax definition which can be used to encode/decode that +structure, and from there we can start actually implementing the language! diff --git a/static/src/_posts/2021-05-02-nfts.md b/static/src/_posts/2021-05-02-nfts.md new file mode 100644 index 0000000..a3871b1 --- /dev/null +++ b/static/src/_posts/2021-05-02-nfts.md @@ -0,0 +1,349 @@ +--- +title: >- + NFTs +description: >- + Some thoughts about. +tags: tech crypto +--- + +NFT stands for "non-fungible token". The "token" part refers to an NFT being a +token whose ownership is recorded on a blockchain. Pretty much all +cryptocurrencies, from bitcoin to your favorite shitcoin, could be called tokens +in this sense. Each token has exactly one owner, and ownership of the token can +be transferred from one wallet to another via a transaction on the blockchain. + +What sets an NFT apart from a cryptocurrency is the "non-fungible" part. +Cryptocurrency tokens are fungible; one bitcoin is the same as any other bitoin +(according to the protocol, at least), in the same way as one US dollar holds as +much value as any other US dollar. Fungibility is the property of two units of +something being exactly interchangeable. + +NFTs are _not_ fungible. One is not the same as any other. Each has some piece +of data attached to it, and each is recorded separately on a blockchain as an +individual token. You can think of an NFT as a unique cryptocurrency which has a +supply of 1 and can't be divided. + +Depending on the protocol used to produce an NFT, the data attached to it might +be completely independent of its identity, even. It may be possible to produce +two NFTs with the exact same data attached to them (again, depending on the +protocol used), but even so those two NFTs will be independent and not +interchangeable. + +## FUD + +Before getting into why NFTs are interesting, I want to first address some +common criticism I see of them online (aka, in my twitter feed). The most +common, and unfortunately least legitimate, criticism has to do with the +environmental impact of NFTs. While the impact on energy usage and the +environment when talking about bitcoin is a topic worth going into, bitcoin +doesn't support hosting NFTs and therefore that topic is irrelevant here. + +Most NFTs are hosted on ethereum, which does have a comparable energy footprint +to bitcoin (it's somewhat less than half, according to the internet). _However_, +ethereum is taking actual, concrete steps towards changing its consensus +mechanism from proof-of-work (PoW) to proof-of-stake (PoS), which will cut the +energy usage of the network down to essentially nothing. The rollout plan for +Ethereum PoS covers the next couple of years, and after that we don't really +have to worry about the energy usage of NFTs any longer. + +The other big criticism I hear is about the value and nature of art and what the +impact of NFTs are in that area. I'm going to talk more about this in this post, +but, simply put, I don't think that the value and nature of art are immutable, +anymore than the form of art is immutable. Perhaps NFTs _will_ change art, but +change isn't bad in itself, and furthermore I don't think they will actually +change it all that much. People will still produce art, it's only the +distribution mechanism that might change. + +## Real, Useful, Boring Things + +Most of the coverage around NFTs has to do with using them to represent +collectibles and art. I'd like to start by talking about other use-cases, those +where NFTs are actually "useful" (in the dull, practical sense). + +Each NFT can carry some piece of data along with it. This data can be anything, +but for a practical use-case it needs to be something which indicates ownership +of some internet good. It _cannot_ be the good itself. For example, an NFT which +contains an image does not really convey the ownership of that image; anyone can +copy the image data and own that image as well (intellectual property rights be +damned!). + +A real use-case for NFTs which I'm already, if accidentally, taking advantage +of, is domain name registration. I am the proud owner of the +[mediocregopher.eth][ens] domain name (the `.eth` TLD is not yet in wide usage +in browsers, but one day!). The domain name's ownership is indicated by an NFT: +whoever holds that NFT, which I currently do, has the right to change all +information attached to the `mediocregopher.eth` domain. If I want to sell the +domain all I need to do is sell the NFT, which can be done via an ethereum +transaction. + +Domain names work well for NFTs because knowing the data attached to the NFT +doesn't actually do anything for you. It's the actual _ownership_ of the NFT +which unlocks value. And I think this is the key rule for where to look to apply +NFTs to practical use-cases: the ownership of the NFT has to unlock some +functionality, not the data attached to it. The functionality has to be digital +in nature, as well, as anything related to the physical world is not as easily +guaranteed. + +I haven't thought of many further practical use-cases of NFTs, but we're still +in early stages and I'm sure more will come up. In any case, the practical stuff +is boring, let's talk about art. + +[ens]: https://nfton.me/nft/0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85/7558304748055753202351203668187280010336475031529884349040105080320604507070 + +## Art, Memes, and All Wonderful Things + +For many the most baffling aspect of NFTs is their use as collectibles. Indeed, +their use as collectibles is their _primary_ use right now, even though these +collectibles procur no practical value for their owner; at best they are +speculative goods, small gambles, and at worst just a complete waste of money. +How can this be? + +The curmudgeons of the world would have you believe that money is only worth +spending on goods which offer practical value. If the good is neither consumable +in a way which meets a basic need, nor produces other goods of further value, +then it is worthless. Obviously NFTs fall into the "worthless" category. + +Unfortunately for them, the curmudgeons don't live in reality. People spend +their money on stupid, pointless shit all the time. I'm prepared to argue that +people almost exclusively spend their money on stupid, pointless shit. The +monetary value of a good has very little to do with its ability to meet a basic +necessity or its ability to produce value (whatever that even really means), and +more to do with how owning the shiny thing or doing the fun thing makes us +stupid monkeys very happy (for a time). + +Rather than bemoan NFTs, and our simple irrationality which makes them +desirable, let's embrace them as a new tool for expressing our irrationality to +the world, a tool which we have yet to fully explore. + +### A Moment Captured + +It's 1857 and Jean-François Millet reveals to the world what would become one of +his best known works: _The Gleaners_. + +{% include image.html dir="nfts" file="gleaners.jpg" width=5354 %} + +The painting depicts three peasants gleaning a field, the bulk of their harvest +already stacked high in the background. The [wikipedia entry][gleaners] has this +to say about the painting's eventual final sale: + +> In 1889, the painting, then owned by banker Ferdinand Bischoffsheim, sold for +> 300,000 francs at auction. The buyer remained anonymous, but rumours were +> that the painting was coveted by an American buyer. It was announced less than +> a week later that Champagne maker Jeanne-Alexandrine Louise Pommery had +> acquired the piece, which silenced gossip on her supposed financial issues +> after leaving her grapes on the vines weeks longer than her competitors. + +I think we can all breathe a sigh of relief for Jeanne-Alexandrine. + +I'd like to talk about _why_ this painting was worth 300k francs, and really +what makes art valuable at all (aside from the money laundering and tax evasion +that high-value art enables). Millet didn't merely take a picture using paints +and canvas, an exact replica of what his eyes could see. It's doubtful this +scene ever played out in reality, exactly as depicted, at all! It existed only +within Millet himself. + +In _The Gleaners_ Millet captured far more than an image: the image itself +conveys the struggle of a humble life, the joy of the harvest, the history of +the french peasantry (and therefore the other french societal classes as well), +the vastness of the world compared to our little selves, and surely many other +things, each dependant on the viewer. The image conveys emotions, and most +importantly it conveys emotions captured at a particular moment, a moment which +no longer exists and will never exist again. The capturing of such a moment by +an artist capable of doing it some justice, so others can experience it to any +significant degree far into the future, is a rare event. + +Access to that rare moment is what is being purchased for 300k francs. We refer +to the painting as the "original", but really the painting is only the +first-hand reproduction of the moment, which is the true original, and proximity +to the true original is what is being purchased. All other reproductions must be +based on this first-hand one (be they photographs or painted copies), and are +therefore second and third-hand. + +Consider the value of a concert ticket; it is based on both how much in demand +the performance is, how close to the performance the seating section is, and how +many seats in that section there are. When one purchases the "original" _The +Gleaners_, one is purchasing a front-row ticket to a world-class performance at +a venue with only one seat. That is why it was worth 300k francs. + +I have one final thing to say here and then I'll move onto the topic at hand: +the history of the work compounds its value as well. _The Gleaners_ conveys an +emotion, but knowing the critical reaction of the french elite at its first +unveiling can add to that emotion. + +Again, from the [wiki entry][gleaners]: + +> Millet's The Gleaners was also not perceived well due to its large size, 33 +> inches by 44 inches, or 84 by 112 centimetres. This was large for a painting +> depicting labor. Normally this size of a canvas was reserved for religious or +> mythological style paintings. Millet's work did not depict anything +> religiously affiliated, nor was there any reference to any mythological +> beliefs. The painting illustrated a realistic view of poverty and the working +> class. One critic commented that "his three gleaners have gigantic +> pretensions, they pose as the Three Fates of Poverty...their ugliness and +> their grossness unrelieved." + +Now scroll back up and see if you don't now have more affinity for the painting +than before you knew that. If so, then the face value just went up, just a +little bit. + +[gleaners]: https://en.wikipedia.org/wiki/The_Gleaners + +### The Value of an NFT + +With this acknowledgement of _why_ people desire art, we can understand why they +would want an NFT depicting an artwork. + +A few days ago an NFT of this image sold for almost $500k: + +{% include image.html dir="nfts" file="disaster-girl.jpg" width=2560 %} + +Most of the internet knows this image as _Disaster Girl_, a meme which has been +around since time immemorial (from the internet's perspective, anyway, in +reality it was taken in 2007). The moment captured is funny, the girl in the +image smiling as if she had set the fire which blazes in the background. But, as +with _The Gleaners_, the image itself isn't everything. The countless usages of +the image, the original and all of its remixes, all passed around as memes on +the internet for the past 14 years, have all worked to add to the image's +demand. _Disaster Girl_ is no longer just a funny picture or a versatile meme +format, it's a piece of human history and nostalgia. + +Unlike physical paintings, however, internet memes are imminently copyable. If +they weren't they could hardly function as memes! We can only have one +"original" _The Gleaners_, but anyone with a computer can have an exact, perfect +copy of the original _Disaster Girl_, such that there's no true original. But if +I were to put up an NFT of _Disaster Girl_ for sale, I wouldn't get a damned +penny for it (probably). Why was that version apparently worth $500k? + +The reason is that the seller is the girl in the image herself, now 21 years old +and in college. I have no particular connection to _Disaster Girl_, so buying an +NFT from me would be like buying a print of _The Gleaners_ off some rando in the +street; just a shallow copy, worth only the material it's printed on plus some +labor, and nothing more. But when Disaster Girl herself sells the NFT, then the +buyer is actually part of the moment, they are entering themselves into the +history of this meme that the whole world has taken a part in for the last 14 +years! $500k isn't so unreasonable in that light. + +### Property on the Internet + +I don't make it a secret that I consider "intellectual property" to be a giant +fucking scam that the world has unfortunately bought into. Data, be it a +physical book or a digital file, is essentially free to copy, and so any price +placed on the copying or sharing of knowledge is purely artificial. But we don't +have an alternate mechanism for paying producers of knowledge and art, and so we +continue to treat data as property even though it bears absolutely no +resemblance to anything of the kind. + +Disaster Girl has not, to my knowledge, asserted any property rights on the +image of herself. Doing so in any real sense, beyond going after a handful of +high-value targets who might settle a lawsuit, is simply not a feasible option. +Instead, by selling an NFT, Disaster Girl has been compensated for her labor +(meager as it was) in a way which was proportional to its impact on the world, +all without the invocation of the law. A great success! + +Actually, the labor was performed by Disaster Girl's father, who took the +original image and sent it into a photo contest or something. What would have +happened if the NFT was sold in his name? I imagine that it would not have sold +for nearly as much. This makes sense to me, even if it does not make sense from +a purely economical point of view. Disaster Girl's father did the work in the +moment, but being a notable figure to the general public is its own kind of +labor, and it's likely that his daughter has born the larger burden over time. +The same logic applies to why we pay our movie stars absurd amounts even while +the crew makes a "normal" wage. + +Should the father not then get compensated at all? I think he should, and I +think he could! If he were to produce an NFT of his own, of the exact same +image, it would also fetch a decent price. Probably not 6 figures, possibly not +even 4, but considering the actual contribution he made (taking a picture and +uploading it), I think the price would be fair. How many photographers get paid +anything at all for their off-hand pictures of family outings? + +And this is the point I'd like to make: an NFT's price, like in all art, is +proportional to the distance to the moment captured. The beauty is that this +distance is purely subjective; it is judged not by rules set down in law by +fallable lawyers, but instead by the public at large. It is, in essence, a +democritization of intellectual property disputes. If multiple people claim to +having produced a single work, let them all produce an NFT, and the market will +decide what each of their work is worth. + +Will the market ever be wrong? Certainly. But will it distribute the worth more +incorrectly than our current system, where artists must sell their rights to a +large publisher in order to see a meager profit, while the publisher rakes in +the vastly larger share? I sincerely doubt it. + +### Content Creation + +Another interesting mechanism of NFTs is that some platforms (e.g. +[Rarible][rarible]) allow the seller to attach a royalty percentage to the NFT +being solde. When this is done it means the original seller will receive some +percentage of all future sales of that NFT. + +I think this opens some interesting possibilities for content creators. Normally +a content creator would need to sell ads or subscriptions in order to profit +from their content, but if they instead/in addition sell NFTs associated with +their content (e.g. one per episode of their youtube show) they can add another +revenue stream. As their show, or whatever, begins to take off, older NFTs +become more valuable, and the content creator can take advantage of that new +increased value via royalties set on the NFTs. + +There's some further interesting side-effects that come from using NFTs in this +way. If a creator releases a work, and a corresponding NFT for that work, their +incentive is no longer to gate access to that work (as it would be in our +current IP system) or burden the work with advertisements and pleas for +subscriptions/donations. There's an entirely new goalpost for the creator: +actual value to others. + +The value of the NFT is based entirely and arbitrarily on other's feelings +towards the original work, and so it is in the creator's interest to increase +the visibility and virality of the work. We can expect a creator who has sold an +NFT for a work, with royalties attached, to actively ensure there is as +little gatekeeping around the work as possible, and to create work which is +completely platform-agnostic and available absolutely everywhere. Releasing a +work as public-domain could even become a norm, should NFTs prove more +profitable than other revenue streams. + +### Shill Gang + +While the content creator's relationship with their platform(s) will change +drastically, I also expect that their relationship with their fans, or really +their fan's relationship with the creator's work, will change even more. Fans +are no longer passive viewers, they can have an actual investment in a work's +success. Where fans currently shill their favorite show or game or whatever out +of love, they can now also do it for personal profit. I think this is the worst +possible externality of NFTs I've encountered: internet fandom becoming orders +of magnitude more fierce and unbearable, as they relentlessly shill their +investments to the world at large. + +There is one good thing to come out of this new fan/content relationship though, +and that's the fan's role in distribution and preservation of work. Since fans +now have a financial incentive to see a work persist into the future, they will +take it upon themselves to ensure that the works won't accidentally fall off the +face of the internet (as things often do). This can be difficult currently since +work is often tied down with IP restrictions, but, as we've established, work +which uses NFTs for revenue is incentivized to _not_ tie itself down in any way, +so fans will have much more freedom in this respect. + +[rarible]: https://rarible.com/ + +### Art + +It seems unlikely to me that art will cease to be created, or cease to be +valuable. The human creative instinct comes prior to money, and we have always +created art regardless of economic concerns. It's true that the nature of our +art changes according to economics (don't forget to hit that "Follow" button at +the top!), but if anything I think NFTs can change our art for the better. Our +work can be more to the point, more accessible, and less encumbered by legal +bullshit. + +## Fin + +That crypto cat is out of the bag, at this point, and I doubt if there's +anything that can put it back. The world has never before had the tools that +cryptocurrency and related technologies (like NFTs) offer, and our lives will +surely change as new uses of these tools make themselves apparent. I've tried to +extrapolate some uses and changes that could come out of NFTs here, but I have +no doubt that I've missed or mistook some. + +It's my hope that this post has at least offered some food-for-thought related +to NFTs, beyond the endless hot takes and hype that can be found on social +media, and that the reader can now have a bigger picture view of NFTs and where +they might take us as a society, should we embrace them. diff --git a/static/src/_posts/2021-05-11-ripple-v3.md b/static/src/_posts/2021-05-11-ripple-v3.md new file mode 100644 index 0000000..396dab0 --- /dev/null +++ b/static/src/_posts/2021-05-11-ripple-v3.md @@ -0,0 +1,442 @@ +--- +title: >- + Ripple V3 +description: >- + We're getting there! +tags: tech +series: ripple +--- + +

+ Movement: Arrow keys or WASD
+ Jump: Space
+ Goal: Jump as many times as possible without touching a ripple!
+
+ Press Jump To Begin! +

+ +_Who can make the muddy water clear?
+Let it be still, and it will gradually become clear._ + + +Your browser doesn't support canvas. At this point in the world that's actually +pretty cool, well done! + + +Score: + 0 + + + + +## Changelog + +The previous version was two easy to break, even with the requirement of jumping +over a ripple to generate a new one and increase your score. This led to the +following major changes: + +* The game now incorporates asteroid/pacman mechanics. Rather than bouncing off + walls, the player and ripples will instead come out the opposite wall they + travel through. + +* Jump height no longer affects score or the "strength" of a ripple. diff --git a/static/src/_posts/2021-05-16-new-years-resolution-vibe-check.md b/static/src/_posts/2021-05-16-new-years-resolution-vibe-check.md new file mode 100644 index 0000000..965eac8 --- /dev/null +++ b/static/src/_posts/2021-05-16-new-years-resolution-vibe-check.md @@ -0,0 +1,62 @@ +--- +title: >- + New Year's Resolution Vibe Check +description: >- + The not-quite-halfway progress report. +--- + +It's been over five months since I started my New Year's resolution, where I +committed to writing 52 blog posts by the end of the year. This week I'm on the +first vacation I've been able to take since the pandemic started, and, for lack +of anything else to really write about, am doing an almost-halfway checkup on +the whole process. + +Almost immediately into the process I wished I'd set my sights a bit lower. One +post a week is a pretty intense pace, it turns out. If I were to reset the +parameters of the resolution I would probably halve the requirements, down to +26 posts in the year. One concern would be that I would be more likely to forget +to do the bi-weekly post, whereas with the current system it's coupled with my +normal work rhythm and so stays more top of mind. But I think I'd have a much +easier time (perhaps even twice as easy!), so it might balance out. + +My thought in the beginning was that I could write on Friday afternoons or +Monday mornings as a bookend to working, but what's generally happened is that I +write on weekends. During the week the energy to write something up just isn't +there; writing posts is a kind of work all on its own, and I can only bring +myself to do so much work everyday. + +Lately it's been particularly difficult to pump out the posts. Obviously a large +component of this is that I quickly picked all the low hanging fruit that were +on my mind when I started this resolution, but an unexpected culprit has also +appeared: seasons. When I started the resolution it was still winter, and during +the cold months it's a lot easier to stay inside and work on a computer. As the +weather warms it's been harder to find time though, in between working on the +garden and going out and doing things with friends. + +Figuring out what to write about is becoming more of a challenge as well +(obviously, given the topic of this post). Ideally I'd like to post about things +I'm _doing_, rather than just talking about some topic, and for the most part +I've mostly kept to that. Constantly posting about ideas I have or opinions I +hold isn't really contributing any real work, unless the ideas or opinions are +really groundbreaking (they're not). If, on the other hand, I use the posts as a +kind of background motivation to get up and do something useful, so I can write +about what I did, then at least progress has been made on _something_. + +The catch there is that I've now added an additional "thing" to do every week, +in addition to the weekly post, and, as previously covered, I just don't have +the time and energy for that. So some posts (ahem) are pretty much fluff, and I +barely have the energy for those! Yet another reason to wish I'd committed to 26 +in the year, I suppose. + +It hasn't been all added stress and strife though. Doing the posts _has_ caused +me to work on side projects more, and even better quite a few people I know have +given me really good feedback on what I've been doing, and some have even +started getting involved. So, in the sense of being a way to inform others about +the things I'm working on, the posts are a great success! And I've definitely +been more consistent about working on side projects this year. + +I'll wrap this up and continue with my vacation. Summary: blog is more extra +work than expected, it's maybe worth it, but it would be more worth it if I +halved my pace. I'm not _going_ to halve my pace, because that's not how +resolutions work. The end. + diff --git a/static/src/_posts/2021-05-26-viz-4.md b/static/src/_posts/2021-05-26-viz-4.md new file mode 100644 index 0000000..cd6054a --- /dev/null +++ b/static/src/_posts/2021-05-26-viz-4.md @@ -0,0 +1,213 @@ +--- +title: >- + Visualization 4 +description: >- + Birth, death, and colors. +series: viz +tags: tech art +--- + + + +This visualization is a conglomeration of ideas from all the previous ones. On +each tick up to 20 new pixels are generated. The color of each new pixel is +based on the average color of its neighbors, plus some random drift. + +Each pixel dies after a certain number of ticks, `N`. A pixel's life can be +extended by up to `8N` ticks, one for each neighbor it has which is still alive. +This mechanism accounts for the strange behavior which is seen when the +visualization first loads, but also allows for more coherent clusters of pixels +to hold together as time goes on. + +The asteroid rule is also in effect in this visualization, so the top row and +bottom row pixels are neighbors of each other, and similarly for the rightmost +and leftmost column pixels. + + diff --git a/static/src/_posts/2021-05-28-viz-5.md b/static/src/_posts/2021-05-28-viz-5.md new file mode 100644 index 0000000..ea2f9e9 --- /dev/null +++ b/static/src/_posts/2021-05-28-viz-5.md @@ -0,0 +1,306 @@ +--- +title: >- + Visualization 5 +description: >- + Seeing double. +series: viz +tags: tech art +--- + + + + + +
+ + +
+ +
+
+
+
+
+
+
+
+ +This visualization combines two distinct layers, each of them borrowing their +behavior from [Visualization 4][viz4]. Neither layer has any effect on the +other, one is merely super-imposed on top of the other in the top canvas. You +can see each layer individually in the two lower canvases. + +Despite their not affecting each other, the code is set up so that each layer +_could_ be affected by the other. This will likely be explored more in a future +post. + +[viz4]: {% post_url 2021-05-26-viz-4 %} + + diff --git a/static/src/_posts/2021-06-07-adventures-in-defi.md b/static/src/_posts/2021-06-07-adventures-in-defi.md new file mode 100644 index 0000000..f5a5879 --- /dev/null +++ b/static/src/_posts/2021-06-07-adventures-in-defi.md @@ -0,0 +1,271 @@ +--- +title: >- + Adventures In DeFi +description: >- + There and Back Again, a Yield Farmer's Tale. +--- + +It's difficult to be remotely interested in crypto and avoid the world of +decentralized finance (DeFi). Somewhere between the explosion of new projects, +implausible APY percents, complex tokens schemes, new phrases like "yield +farming" and "impermanent loss", rug pulls, hacks, and astronomical ethereum +fees, you simply _must_ have heard of it, even in passing. + +In late November of 2020 I decided to jump in and see what would happen. I read +everything I could find, got as educated as I could, did some (but probably not +enough) math, and got to work. Almost immediately afterwards a giant bull +market hit, fees on ethereum shot up to the moon, and my little yield farming +DeFi ship was effectively out to sea. + +For the past 200 days I haven't been able to tweak or withdraw any of the DeFi +positions I made, for fear of incurring so many ethereum fees that any gains I +made would be essentially wiped out. But the bull market is finally at a rest, +fees are down, and I'm interested in what the results of my involuntary +long-term experiment were. Before getting to the results though, let's start at +the beginning. I'm going to walk you through all the steps I took, as well as my +decision making process (as flawed as it surely was) and risk assessments. + +## Step 1: The Base Positions + +My first step was to set aside some ETH and BTC for this experiment. I was (and +remain) confident that these assets would acrue in value, and so wanted to hold +onto them for a long period of time. But while holding onto those assets, why +not make a little interest on them by putting them to use? That's where DeFi +comes in. + +I started with 2.04 ETH and 0.04 BTC. The ETH existed as normal ETH on the +ethereum blockchain, while the 0.04 BTC I had to first convert to [renBTC][ren]. + +renBTC is an ethereum token whose value is pinned to the value of BTC. This is +accomplished via a decentralized locking mechanism, wherein real BTC is +transferred to a decentralized network of ren nodes, and they lock it such that +no individual node has access to the wallet holding the BTC. At the same time +that the BTC is locked, the nodes print and transfer a corresponding amount of +renBTC to a wallet specified in the BTC transaction. It's a very interesting +project, though the exact locking mechanism used was closed-source at the time I +used it, which concerned me somewhat. + +[ren]: https://renproject.io/ + +### Step 1.5: Collateralization + +In Step 2 I deposit my assets into liquidity pools. For my renBTC this was no +problem, but for my ETH it wasn't so simple. I'll explain what a liquidity pool +is in the next section, but for now all that needs to be known is that there are +no worthwhile liquidity pools between ETH and anything ostensibly pinned to ETH +(e.g. WETH). So I needed to first convert my ETH into an asset for which there +are worthwhile liquidity pools, while also not losing my ETH position. + +Enter [MakerDAO][makerdao]. MakerDAO runs a decentralized collateralization app, +wheren a user deposits assets into a contract and is granted an amount of DAI +tokens relative to the value of the deposited assets. The value of DAI tokens +are carefully managed via the variable fee structure of the MakerDAO app, such +that 1 DAI is, generally, equal to 1 USD. If the value of the collateralized +assets drops below a certain threshold the position is liquidated, meaning the +user keeps the DAI and MakerDAO keeps the assets. It's not dissimilar to taking +a loan out, using one's house as collateral, except that the collateral is ETH +and not a house. + +MakerDAO allows you to choose, within some bounds, how much DAI you withdraw on +your deposited collateral. The more DAI you withdraw, the higher your +liquidation threshold, and if your assets fall in value and hit that threshold +you lose them, so a higher threshold entails more risk. In this way the user has +some say over how risky of a position they want to take out. + +In my case I took out a total of 500 DAI on my 2.04 ETH. Even at the time this +was somewhat conservative, but now that the price of ETH has 5x'd it's almost +comical. In any case, I now had 500 DAI to work with, and could move on to the +next step. + +[makerdao]: https://makerdao.com/ + +## Step 2: Liquidity Pools + +My assets were ready to get put to work, and the work they got put to was in +liquidity pools (LPs). The function of an LP is to facilitate the exchange of +one asset for another between users. They play the same role as a centralized +exchange like Kraken or Binance, but are able to operate on decentralized chains +by using a different exchange mechanism. + +I won't go into the details of how LPs work here, as it's not super pertinent. +There's great explainers, like [this one][lp], that are easy to find. Suffice it +to say that each LP operates on a set of assets that it allows users to convert +between, and LP providers can deposit one or more of those assets into the pool +in order to earn fees on each conversion. + +When you deposit an asset into an LP you receive back a corresponding amount of +tokens representing your position in that LP. Each LP has its own token, and +each token represents a share of of the pool that the provider owns. The value +of each token goes up over time as fees are collected, and so acts as the +mechanism by which the provider ultimately collects their yield. + +In addition to the yield one gets from users making conversions via the LP, LP +providers are often also further incentivized by being granted governance tokens +in the LPs they provide for, which they can then turn around and sell directly +or hold onto as an investment. These are usually granted via a staking +mechanism, where the LP provider stakes (or "locks") their LP tokens into the +platform, and is able to withdraw the incentive token based on how long and how +much they've staked. + +Some LP projects, such as [Sushi][sushi], have gone further and completely +gamified the whole experience, and are the cause of the multi thousand percent +APYs that DeFi has become somewhat famous for. These projects are flashy, but I +couldn't find myself placing any trust in them. + +There is a risk in being an LP provider, and it's called ["impermanent +loss"][il]. This is another area where it's not worth going into super detail, +so I'll just say that impermanent loss occurs when the relative value of the +assets in the pool diverges significantly. For example, if you are a provider in +a BTC/USDC pool, and the value of BTC relative to USD either tanks or +skyrockets, you will have ended up losing money. + +I wanted to avoid impermanent loss, and so focused on pools where the assets +have little chance of diverging. These would be pools where the assets are +ostensibly pinned in value, for example a pool between DAI and USDC, or between +renBTC and WBTC. These are called stable pools. By choosing such pools my only +risk was in one of the pooled assets suddenly losing all of its value due to a +flaw in its mechanism, for example if MakerDAO's smart contract were to be +hacked. Unfortunately, stable pools don't have as great yields as their volatile +counterparts, but given that this was all gravy on top of the appreciation of +the underlying ETH and BTC I didn't mind this as much. + +I chose the [Curve][curve] project as my LP project of choice. Curve focuses +mainly on stable pools, and provides decent yield percents in that area while +also being a relatively trusted and actively developed project. + +I made the following deposits into Curve: + +* 200 DAI into the [Y Pool][ypool], receiving back 188 LP tokens. +* 300 DAI into the [USDN Pool][usdnpool], receiving back 299 LP tokens. +* 0.04 renBTC into the [tBTC Pool][tbtcpool], receiving back 0.039 LP tokens. + +[lp]: https://finematics.com/liquidity-pools-explained/ +[il]: https://finematics.com/impermanent-loss-explained/ +[sushi]: https://www.sushi.com/ +[curve]: https://curve.fi +[ypool]: https://curve.fi/iearn +[usdnpool]: https://curve.fi/usdn +[tbtcpool]: https://curve.fi/tbtc + +## Step 3: Yield Farming + +At this point I could have taken the next step of staking my LP tokens into the +Curve platform, and periodically going in and reaping the incentive tokens that +doing so would earn me. I could then sell these tokens and re-invest the profits +back into the LP, and then stake the resulting LP tokens back into Curve, +resulting in a higher yield the next time I reap the incentives, ad neaseaum +forever. + +This is a fine strategy, but it has two major drawbacks: + +* I don't have the time, nor the patience, to implement it. +* ETH transaction fees would make it completely impractical. + +Luckily, yield farming platforms exist. Rather than staking your LP tokens +yourself, you instead deposit them into a yield farming platform. The platform +aggregates everyone's LP tokens, stakes them, and automatically collects and +re-invests incentives in large batches. By using a yield farming platform, +small, humble yield farmers like myself can pool our resources together to take +advantage of scale we wouldn't normally have. + +Of course, yield farming adds yet another gamification layer to the whole +system, and complicates everything. You'll see what I mean in a moment. + +The yield farming platform I chose was [Harvest][harvest]. Overall +Harvest had the best advertised APYs (though those can obviously change on a +dime), a large number of farmed pools that gets updated regularly, as well as a +simple interface that I could sort of understand. The project is a _bit_ of a +mess, and there's probably better options now, but it was what I had at the +time. + +For each of the 3 kinds of LP tokens I had collected in Step 2 I deposited them +into the corresponding farming pool on Harvest. As with the LPs, for each +farming pool you deposit into you receive back a corresponding amount of farming +pool tokens which you can then stake back into Harvest. Based on how much you +stake into Harvest you can collect a certain amount of FARM tokens periodically, +which you can then sell, yada yada yada. It's farming all the way down. I didn't +bother much with this. + +[harvest]: https://harvest.finance + +## Step 4: Wait + +At this point the market picked up, ethereum transactions shot up from 20 to 200 +gwei, and I was no longer able to play with my DeFi money without incurring huge +losses. So I mostly forgot about it, and only now am coming back to it to see +the damage. + +## Step 5: Reap What I've Sown + +It's 200 days later, fees are down again, and enough time has passed that I +could plausibly evaluate my strategy, I've gone through the trouble of undoing +all my positions in order to arrive back at my base assets, ETC and BTC. While +it's tempting to just keep the DeFi ship floating on, I think I need to redo it +in a way that I won't be paralyzed during the next market turn, and I'd like to +evaluate other chains besides ethereum. + +First, I've unrolled my Harvest positions, collecting the original LP tokens +back plus whatever yield the farming was able to generate. The results of that +step are: + +* 194 Y Pool tokens (originally 188). +* 336 USDN Pool tokens (originally 299). +* 0.0405 tBTC Pool tokens (originally 0.039). + +Second, I've burned those LP tokens to collect back the original assets from the +LPs, resulting in: + +* 215.83 DAI from the Y Pool (originally 200). +* 346.45 DAI from the USDN Pool (originally 300). +* 0.0405 renBTC from the tBTC Pool (originally 0.04). + +For a total DAI of 562.28. + +Finally, I've re-deposited the DAI back into MakerDAO to reclaim my original +ETH. I had originally withdrawn 500 DAI, but due to interest I now owed 511 +DAI. So after reclaiming my full 2.04 ETH I have ~51 DAI leftover. + +## Insane Profits + +Calculating actual APY for the BTC investment is straightforward: it came out to +about 4.20% APY. Not too bad, considering the position is fairly immune to price +movements. + +Calculating for ETH is a bit trickier, since in the end I ended up with the same +ETH as I started with (2.04) plus 51 DAI. If I were to purchase ETH with that +DAI now, it would get me ~0.02 further ETH. Not a whole heck of a lot. And that +doesn't even account for ethereum fees! I made 22 ethereum transactions +throughout this whole process, resulting in ~0.098 ETH spent on transaction +fees. + +So in the end, I lost 0.078 ETH, but gained 0.0005 BTC. If I were to +convert the BTC gain to ETH now it would give me a net total profit of: + +**-0.071 ETH** + +A net loss, how fun! + +## Conclusions + +There were a lot of takeaways from this experiment: + +* ETH fees will get ya, even in the good times. I would need to be working with + at least an order of magnitude higher base position in order for this to work + out in my favor. + +* I should have put all my DAI in the Curve USDN pool, and not bothered with the + Y pool. It had almost double the percent return in the end. + +* Borrowing DAI on my ETH was fun, but it really cuts down on how much of my ETH + value I'm able to take advantage of. My BTC was able to be fully invested, + whereas at most half of my ETH value was. + +* If I have a large USD position I want to sit on, the USDN pool on its own is + not the worst place to park it. The APY on it was about 30%! + +I _will_ be trying this again, albeit with a bigger budget and more knowledge. I +want to check out other chains besides ethereum, so as to avoid the fees, as +well as other yield mechanisms besides LPs, and other yield farming platforms +besides Harvest. + +Until then! diff --git a/static/src/_posts/2021-06-23-viz-6.md b/static/src/_posts/2021-06-23-viz-6.md new file mode 100644 index 0000000..8262c2b --- /dev/null +++ b/static/src/_posts/2021-06-23-viz-6.md @@ -0,0 +1,402 @@ +--- +title: >- + Visualization 6 +description: >- + Eat your heart out, Conway! +series: viz +tags: tech art +--- + + + + + +
+ + +
+ +
+ +
+

Bottom Layer

+
+
+ + + +
+
+ +
+

Top Layer

+
+
+ + + + +
+
+ +
+ +This visualization is essentially the same as the previous, except that each +layer now operates with different parameters than the other, allowing each to +exhibit different behavior. + +Additionally, the top layer has been made to be responsive to the bottom, via a +new mechanism where the age of an element on the top layer can be extended based +on the number of bottom layer elements it neighbors. + +Finally, the UI now exposes the actual parameters which are used to tweak the +behavior of each layer. Modifying any parameter will change the behavior of the +associated layer in real-time. The default parameters have been chosen such that +the top layer is now rather dependent on the bottom for sustenance, although it +can venture away to some extent. However, by playing the parameters yourself you +can find other behaviors and interesting cause-and-effects that aren't +immediately obvious. Try it! + +An explanation of the parameters is as follows: + +On each tick, up to `maxNewElements` are created in each layer, where each new +element neighbors an existing one. + +Additionally, on each tick, _all_ elements in a layer are iterated through. Each +one's age is determined as follows: + +``` +age = (currentTick - birthTick) +age -= (numNeighbors * neighborBonusScalar) +age -= (numBottomLayerNeighbors * layerBonusScalar) // only for top layer +``` + +If an element's age is greater than or equal to the `ageOfDeath` for that layer, +then the element is removed. + + diff --git a/static/src/_posts/2021-06-26-selfhosted-email-with-maddy.md b/static/src/_posts/2021-06-26-selfhosted-email-with-maddy.md new file mode 100644 index 0000000..0ea3491 --- /dev/null +++ b/static/src/_posts/2021-06-26-selfhosted-email-with-maddy.md @@ -0,0 +1,277 @@ +--- +title: >- + Self-Hosted Email With maddy: A Naive First Attempt +description: >- + How hard could it be? +tags: tech +series: selfhost +--- + +For a _long_ time now I've wanted to get off gmail and host my own email +domains. I've looked into it a few times, but have been discouraged on multiple +fronts: + +* Understanding the protocols underlying email isn't straightforward; it's an + old system, there's a lot of cruft, lots of auxiliary protocols that are now + essentially required, and a lot of different services required to tape it all + together. + +* The services which are required are themselves old, and use operational + patterns that maybe used to make sense but are now pretty freaking cumbersome. + For example, postfix requires something like 3 different system accounts. + +* Deviating from the non-standard route and using something like + [Mail-in-a-box][miab] involves running docker, which I'm trying to avoid. + +So up till now I had let the idea sit, waiting for something better to come +along. + +[maddy][maddy] is, I think, something better. According to the homepage +"\[maddy\] replaces Postfix, Dovecot, OpenDKIM, OpenSPF, OpenDMARC and more with +one daemon with uniform configuration and minimal maintenance cost." Sounds +perfect! The homepage is clean and to the point, it's written in go, and the +docs appear to be reasonably well written. And, to top it all off, it's already +been added to [nixpkgs][nixpkgs]! + +So in this post (and subsequent posts) I'll be documenting my journey into +getting a maddy server running to see how well it works out. + +## Just Do It + +I'm almost 100% sure this won't work, but to start with I'm going to simply get +maddy up and running on my home media server as per the tutorial on its site, +and go from there. + +First there's some global system configuration I need to perform. Ideally maddy +could be completely packaged up and not pollute the rest of the system at all, +and if I was using NixOS I think that would be possible, but as it is I need to +create a user for maddy and ensure it's able to read the TLS certificates that I +manage via [LetsEncrypt][le]. + +```bash +sudo useradd -mrU -s /sbin/nologin -d /var/lib/maddy -c "maddy mail server" maddy +sudo setfacl -R -m u:maddy:rX /etc/letsencrypt/{live,archive} +``` + +The next step is to set up the nix build of the systemd service file. This is a +strategy I've been using recently to nix-ify my services without needing to deal +with nix profiles. The idea is to encode the nix store path to everything +directly into the systemd service file, and install that file normally. In this +case this looks something like: + +``` +pkgs.writeTextFile { + name = "mediocregopher-maddy-service"; + text = '' + [Unit] + Description=mediocregopher maddy + Documentation=man:maddy(1) + Documentation=man:maddy.conf(5) + Documentation=https://maddy.email + After=network.target + + [Service] + Type=notify + NotifyAccess=main + Restart=always + RestartSec=1s + + User=maddy + Group=maddy + + # cd to state directory to make sure any relative paths + # in config will be relative to it unless handled specially. + WorkingDirectory=/mnt/vol1/maddy + ReadWritePaths=/mnt/vol1/maddy + + # ... lots of directives from + # https://github.com/foxcpp/maddy/blob/master/dist/systemd/maddy.service + # that we'll elide here ... + + ExecStart=${pkgs.maddy}/bin/maddy -config ${./maddy.conf} + + ExecReload=/bin/kill -USR1 $MAINPID + ExecReload=/bin/kill -USR2 $MAINPID + + [Install] + WantedBy=multi-user.target + ''; +} +``` + +With the service now testable, it falls on me to actually go through the setup +steps described in the [tutorial][tutorial]. + +## Following The Tutorial + +The first step in the tutorial is setting up of domain names, which I first +perform in cloudflare (where my DNS is hosted) and then reflect into the conf +file. Then I point the `tls file` configuration line at my LetsEncrypt +directory by changing the line to: + +``` +tls file /etc/letsencrypt/live/$(hostname)/fullchain.pem /etc/letsencrypt/live/$(hostname)/privkey.pem +``` + + +maddy can access these files thanks to the `setfacl` command I performed +earlier. + +At this point the server should be effectively configured. However, starting it +via systemd results in this error: + +``` +failed to load /etc/letsencrypt/live/mx.mydomain.com/fullchain.pem and /etc/letsencrypt/live/mx.mydomain.com/privkey.pem +``` + +(For my own security I'm not going to be using the actual email domain in this +post, I'll use `mydomain.com` instead.) + +This makes sense... I use a wildcard domain with LetsEncrypt, so certs for the +`mx` sub-domain specifically won't exist. I need to figure out how to tell maddy +to use the wildcard, or actually create a separate certificate for the `mx` +sub-domain. I'd rather the former, obviously, as it's far less work. + +Luckily, making it use the wildcard isn't too hard, all that is needed is to +change the `tls file` line to: + +``` +tls file /etc/letsencrypt/live/$(primary_domain)/fullchain.pem /etc/letsencrypt/live/$(primary_domain)/privkey.pem +``` + +This works because my `primary_domain` domain is set to the top-level +(`mydomain.com`), which is what the wildcard cert is issued for. + +At this point maddy is up and running, but there's still a slight problem. maddy +appears to be placing all of its state files in `/var/lib/maddy`, even though +I'd like to place them in `/mnt/vol1/maddy`. I had set the `WorkingDirectory` in +the systemd service file to this, but apparently that's not enough. After +digging through the codebase I discover an undocumented directive which can be +added to the conf file: + +``` +state_dir /mnt/vol1/maddy +``` + +Kind of annoying, but at least it works. + +The next step is to fiddle with DNS records some more. I add the SPF, DMARC and +DKIM records to cloudflare as described by the tutorial (what do these do? I +have no fuckin clue). + +I also need to set up MTA-STS (again, not really knowing what that is). The +tutorial says I need to make a file with certain contents available at the URL +`https://mta-sts.mydomain.com/.well-known/mta-sts.txt`. I love it when protocol +has to give up and resort to another one in order to keep itself afloat, it +really inspires confidence. + +Anyway, I set that subdomain up in cloudflare, and add the following to my nginx +configuration: + +``` +server { + listen 80; + server_name mta-sts.mydomain.com; + include include/public_whitelist.conf; + + location / { + return 404; + } + + location /.well-known/mta-sts.txt { + + # Check out openresty if you want to get super useful nginx plugins, like + # the echo module, out-of-the-box. + echo 'mode: enforce'; + echo 'max_age: 604800'; + echo 'mx: mx.mydomain.com'; + } +} +``` + +(Note: my `public_whitelist.conf` only allows cloudflare IPs to access this +sub-domain, which is something I do for all sub-domains which I can put through +cloudflare.) + +Finally, I need to create some actual credentials in maddy with which to send my +email. I do this via the `maddyctl` command-line utility: + +``` +> sudo maddyctl --config maddy.conf creds create 'me@mydomain.com' +Enter password for new user: +> sudo maddyctl --config maddy.conf imap-acct create 'me@mydomain.com' +``` + +## Send It! + +At this point I'm ready to actually test the email sending. I'm going to use +[S-nail][snail] to do so, and after reading through the docs there I put the +following in my `~/.mailrc`: + +``` +set v15-compat +set mta=smtp://me%40mydomain.com:password@localhost:587 smtp-use-starttls +``` + +And attempt the following `mailx` command to send an email from my new mail +server: + +``` +> echo 'Hello! This is a cool email' | mailx -s 'Subject' -r 'Me ' 'test.email@gmail.com' +reproducible_build: TLS certificate does not match: localhost:587 +/home/mediocregopher/dead.letter 10/313 +reproducible_build: ... message not sent +``` + +Damn. TLS is failing because I'm connecting over `localhost`, but maddy is +serving the TLS certs for `mydomain.com`. Since I haven't gone through the steps +of exposing maddy publicly yet (which would require port forwarding in my +router, as well as opening a port in iptables) I can't properly test this with +TLS not being required. _It's very important that I remember to re-require TLS +before putting anything public._ + +In the meantime I remove the `smtp-use-starttls` entry from my `~/.mailrc`, and +retry the `mailx` command. This time I get a different error: + +``` +reproducible_build: SMTP server: 523 5.7.10 TLS is required +``` + +It turns out there's a further configuration directive I need to add, this time +in `maddy.conf`. Within my `submission` configuration block I add the following +line: + +``` +insecure_auth true +``` + +This allows plaintext auth over non-TLS connections. Kind of sketchy, but again +I'll undo this before putting anything public. + +Finally, I try the `mailx` command one more time, and it successfully returns! + +Unfortunately, no email is ever received in my gmail :( I check the maddy logs +and see what I feared most all along: + +``` +Jun 29 08:44:58 maddy[127396]: remote: cannot use MX {"domain":"gmail.com","io_op":"dial","msg_id":"5c23d76a-60db30e7","reason":"dial tcp 142.250.152.26:25: connect: connection timed out","remote_addr":"142.250.152. +26:25","remote_server":"alt1.gmail-smtp-in.l.google.com.","smtp_code":450,"smtp_enchcode":"4.4.2","smtp_msg":"Network I/O error"} +``` + +My ISP is blocking outbound connections on port 25. This is classic email +bullshit; ISPs essentially can't allow outbound SMTP connections, as email is so +easily abusable it would drastically increase the amount of spam being sent from +their networks. + +## Lessons Learned + +The next attempt will involve an external VPS which allows SMTP, and a lot more +interesting configuration. But for now I'm forced to turn off maddy and let this +dream sit for a little while longer. + +[miab]: https://mailinabox.email/ +[maddy]: https://maddy.email +[nixpkgs]: https://search.nixos.org/packages?channel=21.05&from=0&size=50&sort=relevance&query=maddy +[tutorial]: https://maddy.email/tutorials/setting-up/ +[le]: https://letsencrypt.org/ +[snail]: https://wiki.archlinux.org/title/S-nail diff --git a/static/src/_posts/2021-07-01-viz-7.md b/static/src/_posts/2021-07-01-viz-7.md new file mode 100644 index 0000000..5bf3e8d --- /dev/null +++ b/static/src/_posts/2021-07-01-viz-7.md @@ -0,0 +1,440 @@ +--- +title: >- + Visualization 7 +description: >- + Feedback Loop. +series: viz +tags: tech art +--- + + + + + +
+ + +
+ +
+ +
+

Bottom Layer

+
+
+ + + + + + +
+
+ +
+

Top Layer

+
+
+ + + + + + +
+
+ +
+ +Once again, this visualization iterates upon the previous. In the last one the +top layer was able to "see" the bottom, and was therefore able to bolster or +penalize its own elements which were on or near bottom layer elements, but not +vice-versa. This time both layers can see each other, and the "Layer Neighbor +Scalar" can be used to adjust lifetime of elements which are on/near elements of +the neighboring layer. + +By default, the bottom layer has a high affinity to the top, and the top layer +has a some (but not as much) affinity in return. + +Another addition is the "likeness" scalar. Likeness is defined as the degree to +which one element is like another. In this visualization likeness is determined +by color. The "Layer Neighbor Likeness Scalar" adjusts the lifetime of elements +based on how like they are to nearby elements on the neighboring layer. + +By default, the top layer has a high affinity for the bottom's color, but the +bottom doesn't care about the top's color at all (and so its color will drift +aimlessly). + +And finally "Color Drift" can be used to adjust the degree to which the color of +new elements can diverge from its parents. This has always been hardcoded, but +can now be adjusted separately across the different layers. + +In the default configuration the top layer will (eventually) converge to roughly +match the bottom both in shape and color. When I first implemented the likeness +scaling I thought it was broken, because the top would never converge to the +bottom's color. + +What I eventually realized was that the top must have a higher color drift than +the bottom in order for it to do so, otherwise the top would always be playing +catchup. However, if the drift difference is _too_ high then the top layer +becomes chaos and also doesn't really follow the color of the bottom. A +difference of 10 (degrees out of 360) is seemingly enough. + + diff --git a/static/src/_posts/2021-07-06-maddy-vps.md b/static/src/_posts/2021-07-06-maddy-vps.md new file mode 100644 index 0000000..93c76d2 --- /dev/null +++ b/static/src/_posts/2021-07-06-maddy-vps.md @@ -0,0 +1,115 @@ +--- +title: >- + Setting Up maddy On A VPS +description: >- + We have delivery! +tags: tech +series: selfhost +--- + +In the previous post I left off with being blocked by my ISP from sending +outbound emails on port 25, effectively forcing me to set up [maddy][maddy] on a +virtual private server (VPS) somewhere else. + +After some research I chose [Vultr][vultr] as my VPS of choice. They apparently +don't block you from sending outbound emails on port 25, and are in general +pretty cheap. I rented their smallest VPS server for $5/month, plus an +additional $3/month to reserve an IPv4 address (though I'm not sure I really +need that, I have dDNS set up at home and could easily get that working here as +well). + +## TLS + +The first major hurdle was getting TLS certs for `mydomain.com` (not the real +domain) onto my Vultr box. For the time being I've opted to effectively +copy-paste my local [LetsEncrypt][le] setup to Vultr, using certbot to +periodically update my records using DNS TXT challenges. + +The downside to this is that I now require my Cloudflare API key to be present +on the Vultr box, which effectively means that if the box ever gets owned +someone will have full access to all my DNS. For now I've locked down the box as +best as I can, and will look into changing the setup in the future. There's two +ways I could go about it: + +* SCP the certs from my local box to the remote everytime they're renewed. This + would require setting up a new user on the remote box with very narrow + privileges. This isn't the worst thing though. + +* Use a different challenge method than DNS TXT records. + +But again, I'm trying to set up maddy, not LetsEncrypt, and so I needed to move +on. + +## Deployment + +In the previous post I talked about how I'm using nix to generate a systemd +service file which encompasses all dependencies automatically, without needing +to install anything to the global system or my nix profile. + +Since that's already been set up, it's fairly trivial to use `nix-copy-closure` +to copy a service file, and _all_ of its dependencies (including configuration) +from my local box to the remote Vultr box. Simply: + +``` +nix-copy-closure -s +``` + +I whipped up some scripts around this so that I can run a single make target and +have it build the service (and all deps), do a `nix-copy-closure` to the remote +host, copy the service file into `/etc/systemd/service`, and restart the +service. + +## Changes + +For the most part the maddy deployment on the remote box is the same as on the +local one. Down the road I will likely change them both significantly, so that +the remote one only deals with SMTP (no need for IMAP) and the local one will +automatically forward all submitted messages to it. + +Once that's done, and the remote Vultr box is set up on my [nebula][nebula] +network, there won't be a need for the remote maddy to do any SMTP +authentication, since the submission endpoint can be made entirely private. + +For now, however, I've set up maddy on the remote box's public interface with +SMTP authentication enabled, to make testing easier. + +## Testing + +And now, to test it! I changed the SMTP credentials in my `~/.mailrc` file as +appropriate, and let a test email rip: + +``` +echo 'Hello! This is a cool email' | mailx -s 'Subject' -r 'Me ' 'test.email@gmail.com' +``` + +This would, ideally, send an email from my SMTP server (on my domain) to a test +gmail domain. Unfortunately, it did not do that, but instead maddy spit this out +in its log: + +> maddy[1547]: queue: delivery attempt failed {"msg_id":"330a1ed9","rcpt":"mediocregopher@gmail.com","reason":"[2001:19f0:5001:355a:5400:3ff:fe73:3d02] Our system has detected that\nthis message does not meet IPv6 sending guidelines regarding PTR\nrecords and authentication. Please review\n https://support.google.com/mail/?p=IPv6AuthError for more information\n. gn42si18496961ejc.717 - gsmtp","remote_server":"gmail-smtp-in.l.google.com.","smtp_code":550,"smtp_enchcode":"5.7.1","smtp_msg":"gmail-smtp-in.l.google.com. said: [2001:19f0:5001:355a:5400:3ff:fe73:3d02] Our system has detected that\nthis message does not meet IPv6 sending guidelines regarding PTR\nrecords and authentication. Please review\n https://support.google.com/mail/?p=IPv6AuthError for more information\n. gn42si18496961ejc.717 - gsmtp"} + +Luckily Vultr makes setting up PTR records for reverse DNS fairly easy. They +even allowed me to do it on my box's IPv6 address which I'm not paying to +reserve (though I'm not sure what the long-term risks of that are... can it +change?). + +Once done, I attempted to send my email again, and what do you know... + +![Success!](/assets/maddy-vps/success.png) + +Success! + +So now I can send emails. There are a few next steps from here: + +* Get the VPS on my nebula network and lock it down properly. + +* Fix the TLS cert situation. + +* Set up the remote maddy to forward submissions to my local maddy. + +* Use my sick new email! + +[maddy]: https://maddy.email +[le]: https://letsencrypt.org/ +[vultr]: https://www.vultr.com/ +[nebula]: https://github.com/slackhq/nebula diff --git a/static/src/_posts/2021-07-14-how-to-secure-a-webapp.md b/static/src/_posts/2021-07-14-how-to-secure-a-webapp.md new file mode 100644 index 0000000..155068d --- /dev/null +++ b/static/src/_posts/2021-07-14-how-to-secure-a-webapp.md @@ -0,0 +1,315 @@ +--- +title: >- + How to Secure a Webapp +description: >- + Get ready to jump through some hoops. +tags: tech +--- + +In this post I will be documenting all security hoops that one must jump through +in order to consider their webapp secure. This list should not be considered +comprehensive, as I might have forgotten something or some new hoop might have +appeared since writing. + +For the context of this post a "webapp" will be considered to be an HTML/CSS/JS +website, loaded in a browser, with which users create and access accounts using +some set of credentials (probably username and password). In other words, most +popular websites today. This post will only cover those concerns which apply to +_all_ webapps of this nature, and so won't dive into any which might be incurred +by using one particular technology or another. + +Some of these hoops might seem redundant or optional. That may be the case. But +if you are building a website and are beholden to passing some third-party +security audit for any reason you'll likely find yourself being forced to +implement most, if not all, of these measures anyway. + +So without further ado, let's get started! + +## HTTPS + +At this point you have to use HTTPS, there's not excuse for not doing so. All +attempts to hit an HTTP endpoint should redirect to the equivalent HTTPS +endpoint, and you should be using [HSTS][hsts] to ensure that a browser is never +tricked into falling back to HTTP via some compromised DNS server. + +[hsts]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security + +## Cookies + +Cookies are an old web technology, and have always been essentially broken. Each +cookie can have certain flags set on it which change their behavior, and some of +these flags are required at this point. + +### Secure + +If you're storing anything sensitive in a cookie (spoiler alert: you will be) +then you need to have the Secure flag set on it. This prevents the cookie from +being sent in a non-HTTPS request. + +### HTTPOnly + +The HTTPOnly flag protects a cookie from XSS attacks by preventing it from being +accessible from javascript. Any cookie which is storing sensitive information +_must_ have this flag set. In the **Authentication** section we will cover the +storage of session tokens, but the TLDR is that they have to be stored in an +HTTPOnly cookie. + +Practically, this means that your sessions architecture _must_ account for the +fact that the webapp itself will not have direct access to its persistent +session token(s), and therefore must have some other way of knowing that it's +logged in (e.g. a secondary, non-HTTPOnly cookie which contains no secrets but +only signals that the browser is logged in). + +### SameSite + +The SameSite attribute can be set to `Strict`, `Lax`, or `None`. `Lax` is the +default in modern browsers and is sufficient for most security concerns, but if +you can go with `Strict` that would be better. The downside of `Strict` is that +cookies won't be sent on initial page-load of a site. + +In any case, even though `Lax` is the default you should still set this +attribute manually (or your auditor might get to add another bullet point to +their report). + +## Authentication + +Authentication is obviously one of the juiciest targets for an attacker. It's +one thing to be able to trick a user into performing this or that action, but if +one can just log in _as_ the user then they essentially have free-reign over all +their information. + +### Password History + +Most websites use a username/password system as the first step of login. This +is.... fine. We've accepted it, at any rate. But there's a couple of hoops which +must be jumped through as a result of it, and the first is password history. + +I hope it goes without saying that one should be using a hashing algorithm like +bcrypt to store user passwords. But what is often not said is that, for each +user, you need to store the hashes of their last N passwords (where N is +something like 8). This way if they attempt to re-use an old password they are +not able to do so. The users must be protected from themselves, afterall. + +### Credential Stuffing/Account Enumeration + +A credential stuffing attack is one where credentials are stolen from one +website and then attempted to be used on another, in the hope that users have +re-used their username/password across multiple sites. When they occur it'll +often look like a botnet spamming the authentication endpoint with tons of +different credentials. + +Account enumeration is a similar attack: it's where an attacker finds a way to +get the webapp to tell them whether or not an account email/username exists in +the system, without needing to have the right password. This is often done by +analyzing the error messages returned from login or a similar endpoint (e.g. +"Sorry this username is taken"). They then run through all possible values for +that endpoint to try and enumerate which users actually exist in the system. + +Account enumeration is tricky because often those errors are extremely helpful, +and we'd _like_ to keep them if we can. + +I've bucketed both of these attacks in the same section because they have a +similar solution: proof-of-work. The idea is that, for each request to some +sensitive endpoint, the client must send some proof that they've done an +intensive CPU computation. + +Compared to IP-based rate-limiting, PoW is much more effective against botnets +(which have a limitless set of IPs from which to spam you), while also being +much less intrusive on your real users than a captcha. + +PoW stymies botnets because they are generally being hosted by low-power, +compromised machines. In addition the systems that run these botnets are pretty +shallow in capability, because it's more lucrative to rent the botnet out then +to actually use it yourself, so it's rare for a botnet operator to go to the +trouble of implementing your PoW algorithm in the first place. + +So stick a PoW requirement on any login or account creation endpoint, or any +other endpoint which might be used to enumerate accounts in the system. You can +even make the PoW difficulty rise in relation to number of recent attempts on +these endpoints, if you're feeling spry. + +### MFA + +All the PoW checks in the world won't help your poor user who isn't using a +different username/password for each website, and who got unlucky enough to have +those credentials leaked in a hack of a completely separate site than your own. +They also won't help your user if they _are_ using different username/passwords +for everything, but their machine gets straight up stolen IRL and the attacker +gets access to their credential storage. + +What _will_ help them in these cases, however, is if your site supports +multi-factor authentication, such as [TOTP][totp]. If it does then your user +will have a further line of defense in the form of another password which +changes every 30 seconds, and which can only be accessed from a secondary device +(like their phone). If your site claims to care about the security of your +user's account then MFA is an absolute requirement. + +It should be noted, however, that not all MFA is created equal. A TOTP system +is great, but a one-time code being sent over SMS or email is totally different +and not nearly as great. SMS is vulnerable to [SIM jacking][sim], which can be +easily used in a targeted attack against one of your users. One-time codes over +email are pointless for MFA, as most people have their email logged in on their +machine all the time, so if someone steals your user's machine they're still +screwed. + +In summary: MFA is essentially required, _especially_ if the user's account is +linked to anything valuable, and must be done with real MFA systems like TOTP, +not SMS or email. + +[totp]: https://www.twilio.com/docs/glossary/totp +[sim]: https://www.vice.com/en/article/3kx4ej/sim-jacking-mobile-phone-fraud + +### Login Notifications + +Whenever a user successfully logs into their account you should send them email +(or some other notification) letting them know it happened. This way if it +wasn't actually them who did so, but an attacker, they can perhaps act quickly +to lock down their account and prevent any further harm. The login notification +email should have some kind of link in it which can be used to immediately lock +the account. + +### Token Storage + +Once your user has logged into your webapp, it's up to you, the developer, to +store their session token(s) somewhere. The question is... where? Well this +one's easy, because there's only one right answer: HTTPOnly cookies (as alluded +to earlier). + +When storing session tokens you want to guard against XSS attacks which might +grab the tokens and send them to an attacker, allowing that attacker to hijack +the session and pose as the user. This means the following are not suitable +places to store the tokens: + +* Local storage. +* `window`, or anything which can be accessed via `window`. +* Non-HTTPOnly cookies. + +Any of these are trivial to find by a script running in the browser. If a +session token is ephemeral then it may be stored in a "normal" javascript +variable somewhere _as long as_ that variable isn't accessible from a global +context. But for any tokens which need to be persisted across browser restarts +an HTTPOnly cookie is your only option. + +## Cross-Site + +Speaking of XSS attacks, we have some more mitigation coming up... + +### CSP + +Setting a [CSP][csp] for your website is key to preventing XSS. A CSP allows you +to more tightly control the allowed origins of the various entities on your site +(be they scripts, styles, images, etc...). If an entity of unexpected origin +shows up it is disallowed. + +Be sure to avoid any usages of the policies labeled "unsafe" (go figure), +otherwise the CSP is rendered somewhat pointless. Also, when using hostname +based allowlisting try to be as narrow as you can in your allowlist, and +especially only include https hosts. If you can you should opt for the `nonce` +or `sha` policies. + +[csp]: https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP + +### SVG + +A small but important note: if you're website allows users to upload images, +then be _very_ careful about allowing users to upload SVGs. SVGs are actually +XML documents, and even worse than that they allow `