• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

lyokha/nginx-haskell-module: Nginx module for binding Haskell code in configurat ...

原作者: [db:作者] 来自: 网络 收藏 邀请

开源软件名称(OpenSource Name):

lyokha/nginx-haskell-module

开源软件地址(OpenSource Url):

https://github.com/lyokha/nginx-haskell-module

开源编程语言(OpenSource Language):

C 74.1%

开源软件介绍(OpenSource Introduction):

Build Status Hackage Hackage Docker Read the Docs Doc

This Nginx module allows compiling and running Haskell source code found in a configuration file or an existing shared library. It allows for writing in Haskell synchronous variable handlers, asynchronous tasks, services (i.e. asynchronous tasks that are not bound to requests), shared services (i.e. services that work exclusively on a single Nginx worker process all the time), content handlers and POST request handlers.

Table of contents

Motivational example

user                    nobody;
worker_processes        2;

events {
    worker_connections  1024;
}

http {
    default_type        application/octet-stream;
    sendfile            on;

    haskell ghc_extra_options
                -ignore-package regex-pcre
                -XFlexibleInstances -XMagicHash -XTupleSections;

    haskell compile standalone /tmp/ngx_haskell.hs '

import qualified Data.Char as C
import           Text.Regex.PCRE
import           Data.Aeson
import           Data.Maybe
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as C8L
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as C8
import           Data.ByteString.Unsafe
import           Data.ByteString.Internal (accursedUnutterablePerformIO)
import           Text.Pandoc
import           Text.Pandoc.Builder
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import           Data.Function (on)
import           Control.Monad
import           Control.Exception
import           System.IO.Unsafe (unsafePerformIO)
import           Safe

toUpper = map C.toUpper
NGX_EXPORT_S_S (toUpper)

takeN = take . readDef 0
NGX_EXPORT_S_SS (takeN)

NGX_EXPORT_S_S (reverse)

-- does not match when any of the 2 args is empty or not decodable
matches = (fromMaybe False .) . liftM2 (=~) `on` (doURLDecode =<<) . toMaybe
    where toMaybe [] = Nothing
          toMaybe a  = Just a
NGX_EXPORT_B_SS (matches)

firstNotEmpty = headDef "" . filter (not . null)
NGX_EXPORT_S_LS (firstNotEmpty)

isInList [] = False
isInList (x : xs) = x `elem` xs
NGX_EXPORT_B_LS (isInList)

jSONListOfInts :: B.ByteString -> Maybe [Int]
jSONListOfInts = (decode =<<) . doURLDecode . L.fromStrict

isJSONListOfInts = isJust . jSONListOfInts
NGX_EXPORT_B_Y (isJSONListOfInts)

jSONListOfIntsTakeN x = encode $ maybe [] (take n) $ jSONListOfInts y
    where (readDef 0 . C8.unpack -> n, B.tail -> y) = B.break (== 124) x
NGX_EXPORT_Y_Y (jSONListOfIntsTakeN)

class UrlDecodable a
    where doURLDecode :: a -> Maybe a

instance UrlDecodable String where
    -- adopted from
    -- http://www.rosettacode.org/wiki/URL_decoding#Haskell
    doURLDecode [] = Just []
    doURLDecode (\'%\' : xs) =
        case xs of
            (a : b : xss) ->
                (:) . C.chr <$> readMay (\'0\' : \'x\' : [a, b])
                            <*> doURLDecode xss
            _ -> Nothing
    doURLDecode (\'+\' : xs) = (\' \' :) <$> doURLDecode xs
    doURLDecode (x : xs) = (x :) <$> doURLDecode xs

instance UrlDecodable L.ByteString where
    -- adopted for ByteString arguments from
    -- http://www.rosettacode.org/wiki/URL_decoding#Haskell
    doURLDecode (L.null -> True) = Just L.empty
    doURLDecode (L.uncons -> Just (37, xs))
        | L.length xs > 1 =
            let (C8L.unpack -> c, xss) = L.splitAt 2 xs
            in L.cons <$> readMay (\'0\' : \'x\' : c)
                      <*> doURLDecode xss
        | otherwise = Nothing
    doURLDecode (L.uncons -> Just (43, xs)) = (32 `L.cons`) <$> doURLDecode xs
    doURLDecode (L.uncons -> Just (x, xs)) = (x `L.cons`) <$> doURLDecode xs

urlDecode = fromMaybe "" . doURLDecode
NGX_EXPORT_S_S (urlDecode)

-- compatible with Pandoc 2.8 (will not compile for older versions)
simpleHtmlTemplate = unsafePerformIO $ do
    t <- compileTemplate "" $ T.pack "<html>\\n<body>\\n$body$</body></html>"
    return $ case t of
                 Right a -> a
                 Left e -> error e
{-# NOINLINE simpleHtmlTemplate #-}

fromMd (T.decodeUtf8 -> x) = uncurry (, packLiteral 9 "text/html"#, , []) $
    case runPure $ readMarkdown def x >>= writeHtml of
        Right p -> (fromText p, 200)
        Left (T.pack . displayException -> e) ->
            (case runPure $ writeError e of
                 Right p -> fromText p
                 Left  _ -> fromText e, 500)
    where packLiteral l s =
              accursedUnutterablePerformIO $ unsafePackAddressLen l s
          fromText = C8L.fromStrict . T.encodeUtf8
          writeHtml = writeHtml5String htmlWriterOptions
          writeError = writeHtml . doc . para . singleton . Str
          htmlWriterOptions = def { writerTemplate = Just simpleHtmlTemplate }
NGX_EXPORT_HANDLER (fromMd)

toYesNo "0" = "No"
toYesNo "1" = "Yes"
toYesNo  _  = "Unknown"
NGX_EXPORT_S_S (toYesNo)

    ';

    server {
        listen       8010;
        server_name  main;
        error_log    /tmp/nginx-test-haskell-error.log;
        access_log   /tmp/nginx-test-haskell-access.log;

        location / {
            haskell_run toUpper $hs_a $arg_a;
            echo "toUpper ($arg_a) = $hs_a";
            if ($arg_b) {
                haskell_run takeN $hs_a $arg_b $arg_a;
                echo "takeN ($arg_a, $arg_b) = $hs_a";
                break;
            }
            if ($arg_c) {
                haskell_run reverse $hs_a $arg_c;
                echo "reverse ($arg_c) = $hs_a";
                break;
            }
            if ($arg_d) {
                haskell_run matches $hs_a $arg_d $arg_a;
                haskell_run urlDecode $hs_b $arg_a;
                echo "matches ($arg_d, $hs_b) = $hs_a";
                break;
            }
            if ($arg_e) {
                haskell_run firstNotEmpty $hs_a $arg_f $arg_g $arg_a;
                echo "firstNotEmpty ($arg_f, $arg_g, $arg_a) = $hs_a";
                break;
            }
            if ($arg_l) {
                haskell_run isInList $hs_a $arg_a secret1 secret2 secret3;
                echo "isInList ($arg_a, <secret words>) = $hs_a";
                break;
            }
            if ($arg_m) {
                haskell_run isJSONListOfInts $hs_a $arg_m;
                haskell_run urlDecode $hs_b $arg_m;
                echo "isJSONListOfInts ($hs_b) = $hs_a";
                break;
            }
            if ($arg_n) {
                haskell_run jSONListOfIntsTakeN $hs_a $arg_take|$arg_n;
                haskell_run urlDecode $hs_b $arg_n;
                echo "jSONListOfIntsTakeN ($hs_b, $arg_take) = $hs_a";
                break;
            }
        }

        location /content {
            haskell_run isJSONListOfInts $hs_a $arg_n;
            haskell_run toYesNo $hs_b $hs_a;
            haskell_run jSONListOfIntsTakeN $hs_c $arg_take|$arg_n;
            haskell_run urlDecode $hs_d $arg_n;
            haskell_content fromMd "
## Do some JSON parsing

### Given ``$hs_d``

* Is this list of integer numbers?

    + *$hs_b*

* Take $arg_take elements

    + *``$hs_c``*
    ";

        }
    }
}

Haskell source code is loaded with directives haskell compile or haskell load. Both directives accept an absolute path to a haskell source file as their first argument and a haskell source code as their second argument. The code is getting saved to the path and compiled to a shared library when nginx starts. The directives have a subtle distinction: haskell compile always requires the code argument and runs compiler unconditionally, whereas haskell load checks if the target library exists and does not compile source code in this case, thus eliminating necessity of the source code argument.

The module may load an arbitrary haskell code but only those functions are accessible from nginx that are exported with special macros NGX_EXPORT_S_S, NGX_EXPORT_S_SS, NGX_EXPORT_B_S and NGX_EXPORT_B_SS (here S_S, S_SS, B_S and B_SS stand for mnemonic types returns-String-accepts-String, returns-String-accepts-String-String, returns-Bool-accepts-String and returns-Bool-accepts-String-String), their list counterparts NGX_EXPORT_S_LS and NGX_EXPORT_B_LS (LS stands for List-of-Strings) and two macros that deal with bytestrings: NGX_EXPORT_Y_Y and NGX_EXPORT_B_Y (Y stands for bYte). For the sake of efficiency, bytestring macros accept strict but return (only Y_Y) lazy bytestrings. Effectively this means that only those functions are supported that return strings, bytestrings or booleans and accept one, two or more (only S_LS and B_LS) string arguments or one bytestring.

In this example 10 custom haskell functions are exported: toUpper, takeN, reverse (which is normal reverse imported from Prelude), matches (which requires module Text.Regex.PCRE), firstNotEmpty, isInList, isJSONListOfInts, jSONListOfIntsTakeN, urlDecode and toYesNo. In my case this code won't compile due to ambiguity involved by presence of the two installed packages regex-pcre and regex-pcre-builtin, so I had to add an extra ghc compilation flag -ignore-package regex-pcre using directive haskell ghc_extra_options. Other flags include -XFlexibleInstances which allows declaration of instance UrlDecodable String. Class UrlDecodable provides function doURLDecode for decoding strings and bytestrings that was adopted from here. The bytestring instance of doURLDecode makes use of view patterns in its clauses, however this extension does not have to be declared explicitly because it was already enabled in a pragma from the wrapping haskell code provided by this module (see details in section Wrapping haskell code organization). In several clauses of doURLDecode there are explicit characters wrapped inside single quotes which are in turn escaped with backslashes to not confuse nginx parser as the haskell code itself is wrapped inside single quotes. Exported function urlDecode is defined via the string instance of doURLDecode: if decoding fails it returns an empty string.

Let's look inside the server clause, in location / where the exported haskell functions are used. Directive haskell_run takes three or more arguments: it depends on the type of the exported function (S_S, S_SS etc.). The first argument of the directive is the name of an exported haskell function, the second argument is a custom variable where the function's return value will be stored, and the remaining (one or two) arguments are complex values (in the nginx notion: it means that they may contain arbitrary number of variables and plain symbols) that correspond to the arguments of the exported function. Directive haskell_run is allowed in server, location and location-if clauses. In this example all returned strings are stored in the same variable $hs_a which is not a good habit for nginx configuration files. I only wanted to show that upper nginx configuration levels being merged with lower levels behave as normally expected.

There is another haskell directive haskell_content which accepts a haskell function to generate HTTP response and an optional string that will be passed to the function. The function must be of one of the two types: strictByteString-to-lazyByteString and strictByteString-to-4tuple(lazyByteString,strictByteString,Int, list-of-pairs-of-strictByteStrings). It must be exported with NGX_EXPORT_DEF_HANDLER (default content handler) in the first case and NGX_EXPORT_HANDLER in the second case. The elements in the 4tuple correspond to returned content, its type (e.g. text/html etc.), HTTP status, and a list of custom response headers. Default content handler sets content type to text/plain and HTTP status to 200. Directive haskell_content is allowed in location and location-if clauses of the nginx configuration. In the location /content from the above example the directive haskell_content makes use of a haskell function fromMd to generate HTML response from a markdown text. Function fromMd translates a markdown text to HTML using Pandoc library. Notice that content type is built from a string literal with a magic hash at the end to avoid unnecessary expenses (see details about using string literals in section Optimized unsafe content handler).

What about doing some tests? Let's first start nginx (in this example, from the directory where file nginx.conf is located).

# nginx -c`pwd`/nginx.conf
[1 of 1] Compiling NgxHaskellUserRuntime ( /tmp/ngx_haskell.hs, /tmp/ngx_haskell.o )
Linking /tmp/ngx_haskell.so ...

Nginx compiles haskell code at its start. Had compilation failed and nginx would not have started (see details about starting nginx in section Reloading of haskell code and static content). In this case the code is OK and we are moving forward.

$ curl 'http://localhost:8010/?a=hello_world'
toUpper (hello_world) = HELLO_WORLD
$ curl 'http://localhost:8010/?a=hello_world&b=4'
takeN (hello_world, 4) = hell
$ curl 'http://localhost:8010/?a=hello_world&b=oops'
takeN (hello_world, oops) = 
$ curl 'http://localhost:8010/?c=intelligence'
reverse (intelligence) = ecnegilletni
$ curl 'http://localhost:8010/?d=intelligence&a=%5Ei'              # URL-encoded ^i
matches (intelligence, ^i) = 1
$ curl 'http://localhost:8010/?d=intelligence&a=%5EI'              # URL-encoded ^I
matches (intelligence, ^I) = 0
$ curl 'http://localhost:8010/?e=1&g=intelligence&a=smart'
firstNotEmpty (, intelligence, smart) = intelligence
$ curl 'http://localhost:8010/?e=1&g=intelligence&f=smart'
firstNotEmpty (smart, intelligence, ) = smart
$ curl 'http://localhost:8010/?e=1'
firstNotEmpty (, , ) = 
$ curl 'http://localhost:8010/?l=1'
isInList (, <secret words>) = 0
$ curl 'http://localhost:8010/?l=1&a=s'
isInList (s, <secret words>) = 0
$ curl 'http://localhost:8010/?l=1&a=secret2'
isInList (secret2, <secret words>) = 1
$ curl 'http://localhost:8010/?m=%5B1%2C2%2C3%5D'                  # URL-encoded [1,2,3]
isJSONListOfInts ([1,2,3]) = 1
$ curl 'http://localhost:8010/?m=unknown'
isJSONListOfInts (unknown) = 0
$ curl 'http://localhost:8010/?n=%5B10%2C20%2C30%2C40%5D&take=3'   # URL-encoded [10,20,30,40]
jSONListOfIntsTakeN ([10,20,30,40], 3) = [10,20,30]
$ curl 'http://localhost:8010/?n=%5B10%2C20%2C30%2C40%5D&take=undefined'
jSONListOfIntsTakeN ([10,20,30,40], undefined) = []

Let's try location /content (in a browser it looks great!)

$ curl -D- 'http://localhost:8010/content?n=%5B10%2C20%2C30%2C40%5D&take=3'
HTTP/1.1 200 OK
Server: nginx/1.16.0
Date: Wed, 27 Nov 2019 12:13:46 GMT
Content-Type: text/html
Content-Length: 277
Connection: keep-alive

<html>
<body>
<h2>Do some JSON parsing</h2>
<h3>Given <code>[10,20,30,40]</code></h3>
<ul>
<li><p>Is this list of integer numbers?</p>
<ul>
<li><em>Yes</em></li>
</ul></li>
<li><p>Take 3 elements</p>
<ul>
<li><em><code>[10,20,30]</code></em></li>
</ul></li>
</ul></body></html>

Static content in HTTP responses

Reading files in runtime inescapably drops nginx performance. Fortunately there is a haskell module Data.FileEmbed that makes it possible to embed files during ghc compilation time. Consider the following haskell content handler

fromFile (C8.unpack -> "content.html") =
    (L.fromStrict $(embedFile "/path/to/content.html"), "text/html", 200, [])
fromFile (C8.unpack -> "content.txt") =
    (L.fromStrict $(embedFile "/path/to/content.txt"), "text/plain", 200, [])
fromFile _ =
    (C8L.pack "File not found", "text/plain", 500, [])
NGX_EXPORT_HANDLER (fromFile)

(to make it compile another option -XTemplateHaskell must be added into the directive haskell ghc_extra_options and the module Data.FileEmbed must be imported too). Now with a new location

        location /static {
            haskell_static_content fromFile "content.html";
            if ($arg_a) {
                haskell_static_content fromFile "content.txt";
                break;
            }
        }

HTTP requests with URIs that start with /static will be responded with contents of files listed in the clauses of the function fromFile that have been embedded into the function during ghc compilation. Directive haskell_static_content runs its haskell handler and allocates response data only once in nginx worker's lifetime when the first request arrives and is processed in the location. On further requests these data are sent back without running the haskell handler. This makes directive haskell_static_content more optimal for returning static data comparing with haskell_content.

Directive haskell_static_content is useful not only for returning files but for any content that can be evaluated only once in nginx worker's lifetime.

Optimized unsafe content handler

Notice that starting from version 1.3 of this module, all content handlers do not pass copies to the C side! Instead, the underlying lazy bytestrings share their contents with nginx. So all the reasons about extra copying below are no longer actual.

Let's go back to the example from the previous section. All the content handlers we met so far receive a copy of data produced in haskell handlers. Using references to the original data would lead to nasty things after haskell's garbage collector wakeup, so the only safe choice seems to be copying the original data1. Handler fromFile from the example takes static data embedded into the haskell library by Data.FileEmbed, makes a copy of this and passes it to the C code. It runs once per location during location configuration lifetime thanks to the directive haskell_static_content implementation. Nonetheless there are two duplicate static data copies in the program during its run which looks wasteful. It can get even worse when using haskell_static_content is not an option.

Here is an example. Module Data.FileEmbed allows embedding all files in a directory recursively using template function embedDir. This make it possible to emulate nginx static files delivery feature. The following is a quick and dirty implementation.

Haskell content handler.

fromFile (tailSafe . C8.unpack -> f) =
    case lookup f $(embedDir "/rootpath") of
        Just p  -> (L.fromStrict p,            "text/plain", 200, [])
        Nothing -> (C8L.pack "File not found", "text/plain", 404, [])
NGX_EXPORT_HANDLER (fromFile)

Corresponding nginx location.

        location /static {
            haskell_content fromFile $uri;
        }

In this example the files are expected in the directory /rootpath/static. As soon as the target file is parameterized by the value of the $uri, the directive haskell_content must be used in place of haskell_static_content. It means that now files contents will be copied and freed on every single request to location /static.

To address unnecessary copying of static data, a new directive haskell_unsafe_content is introduced. With it the above example can be rewritten as follows.

Haskell content handler.

fromFile (tailSafe . C8.unpack -> f) =
    case lookup f $(embedDir "/rootpath") of
        Just p  -> (p,                                text_plain, 200)
        Nothing -> (packLiteral 14 "File not found"#, text_plain, 404)
    where packLiteral l s = unsafePerformIO $ unsafePackAddressLen l s
          text_plain = packLiteral 10 "text/plain"#
NGX_EXPORT_UNSAFE_HANDLER (fromFile)

Corresponding nginx location.

        location /static {
            haskell_unsafe_content fromFile $uri;
        }

The unsafe handler returns 3tuple(strictByteString,strictByteString,Int). The two strict bytestrings in it must correspond to the really static data, i.e. string literals like "File not found"#, "text/plain"# and those embedded by the Data.FileEmbed, otherwise the nasty things may happen! Literal strings that end with hashes (#) are actually addresses of compiled static byte arrays that do not change during runtime. To enable the hash literals option -XMagicHash must be added into the directive haskell ghc_extra_options. Working on such a low level requires using functions unsafePackAddressLen and unsafePerformIO from modules Data.ByteString.Unsafe and System.IO.Unsafe respectively (in this example unsafePerformIO can be safely replaced with the fastest and the unsafest IO unwrapper accursedUnutterablePerformIO from module Data.ByteString.Internal). Minimum requirements for using static byte arrays in the module Data.FileEmbed are: file-embed version 0.0.7 and Template Haskell version 2.5.0 (bundled with ghc since version 7.0.1).

The unsafe content handler implementation from the above example can be found in file test/tsung/nginx-static.conf.



1  Did you read the notice in the beginning of the section? Yes, lazy bytestrings contents can be safely passed to the C side directly, provided stable pointers (StablePtr) to them are passed too. Creating a stable pointer to a bytestring makes it a root object that is guaranteed not to be garbage collected while the pointer is not freed. The bytestring itself can be relocated, but its buffers not! They are stored in pinned memory arrays that are not moved while the bytestring is alive.

Asynchronous tasks with side effects

All variable handlers we met so far were pure haskell functions without side effects. Inability to put side effects into pure functions has a great significance in the sense that it gives strong guarantees about the time the functions run. In haskell, functions that may produce side effects are normally wrapped inside IO monad. They can do various non-deterministic IO computations like reading or writing files, connecting to network servers etc., which, in principle, may last unpredictably long or even eternally. Despite this, having IO functions as nginx variable handlers are extremely tempting as it makes possible to perform arbitrary IO tasks during an HTTP request. To eliminate their non-probabilistic duration downside, they could be run asynchronously in green threads provided by the haskell RTS library, and somehow signal the nginx worker's main thread after their computations finish. This is exactly what happens in special handler NGX_EXPORT_ASYNC_IOY_Y. Consider the following example.

user                    nobody;
worker_processes        2;

events {
    worker_connections  1024;
}

http {
    default_type        application/octet-stream;
    sendfile            on;

    haskell compile threaded standalone /tmp/ngx_haskell.hs '

import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy.Char8 as C8L
import           Network.HTTP.Client
import           Control.Concurrent
import           Control.Exception
import           Safe

catchHttpException = (`catch` \e ->
        return $ C8L.pack $ "HTTP EXCEPTION: " ++ show (e :: HttpException))

getResponse (C8.unpack -> url) = fmap responseBody . (parseRequest url >>=)

getUrl url = do
    man <- newManager defaultManagerSettings
    catchHttpException $ getResponse url $ flip httpLbs man
NGX_EXPORT_ASYNC_IOY_Y (getUrl)

threadDelaySec = threadDelay . (* 10^6)

delay (readDef 0 . C8.unpack -> v) =
    threadDelaySec v >> return (C8L.pack $ show v)
NGX_EXPORT_ASYNC_IOY_Y (delay)

    ';

    server {
        listen       8010;
        server_name  main;
        error_log    /tmp/nginx-test-haskell-error.log;
        access_log   /tmp/nginx-test-haskell-access.log;

        location / {
            
                      

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap