在线时间:8:00-16:00
迪恩网络APP
随时随地掌握行业动态
扫描二维码
关注迪恩网络微信公众号
开源软件名称(OpenSource Name):lyokha/nginx-haskell-module开源软件地址(OpenSource Url):https://github.com/lyokha/nginx-haskell-module开源编程语言(OpenSource Language):C 74.1%开源软件介绍(OpenSource Introduction):This Nginx module allows compiling and running Haskell source code found in a configuration file or an existing shared library. It allows for writing in Haskell synchronous variable handlers, asynchronous tasks, services (i.e. asynchronous tasks that are not bound to requests), shared services (i.e. services that work exclusively on a single Nginx worker process all the time), content handlers and POST request handlers. Table of contents
Motivational exampleuser nobody;
worker_processes 2;
events {
worker_connections 1024;
}
http {
default_type application/octet-stream;
sendfile on;
haskell ghc_extra_options
-ignore-package regex-pcre
-XFlexibleInstances -XMagicHash -XTupleSections;
haskell compile standalone /tmp/ngx_haskell.hs '
import qualified Data.Char as C
import Text.Regex.PCRE
import Data.Aeson
import Data.Maybe
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as C8L
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as C8
import Data.ByteString.Unsafe
import Data.ByteString.Internal (accursedUnutterablePerformIO)
import Text.Pandoc
import Text.Pandoc.Builder
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Function (on)
import Control.Monad
import Control.Exception
import System.IO.Unsafe (unsafePerformIO)
import Safe
toUpper = map C.toUpper
NGX_EXPORT_S_S (toUpper)
takeN = take . readDef 0
NGX_EXPORT_S_SS (takeN)
NGX_EXPORT_S_S (reverse)
-- does not match when any of the 2 args is empty or not decodable
matches = (fromMaybe False .) . liftM2 (=~) `on` (doURLDecode =<<) . toMaybe
where toMaybe [] = Nothing
toMaybe a = Just a
NGX_EXPORT_B_SS (matches)
firstNotEmpty = headDef "" . filter (not . null)
NGX_EXPORT_S_LS (firstNotEmpty)
isInList [] = False
isInList (x : xs) = x `elem` xs
NGX_EXPORT_B_LS (isInList)
jSONListOfInts :: B.ByteString -> Maybe [Int]
jSONListOfInts = (decode =<<) . doURLDecode . L.fromStrict
isJSONListOfInts = isJust . jSONListOfInts
NGX_EXPORT_B_Y (isJSONListOfInts)
jSONListOfIntsTakeN x = encode $ maybe [] (take n) $ jSONListOfInts y
where (readDef 0 . C8.unpack -> n, B.tail -> y) = B.break (== 124) x
NGX_EXPORT_Y_Y (jSONListOfIntsTakeN)
class UrlDecodable a
where doURLDecode :: a -> Maybe a
instance UrlDecodable String where
-- adopted from
-- http://www.rosettacode.org/wiki/URL_decoding#Haskell
doURLDecode [] = Just []
doURLDecode (\'%\' : xs) =
case xs of
(a : b : xss) ->
(:) . C.chr <$> readMay (\'0\' : \'x\' : [a, b])
<*> doURLDecode xss
_ -> Nothing
doURLDecode (\'+\' : xs) = (\' \' :) <$> doURLDecode xs
doURLDecode (x : xs) = (x :) <$> doURLDecode xs
instance UrlDecodable L.ByteString where
-- adopted for ByteString arguments from
-- http://www.rosettacode.org/wiki/URL_decoding#Haskell
doURLDecode (L.null -> True) = Just L.empty
doURLDecode (L.uncons -> Just (37, xs))
| L.length xs > 1 =
let (C8L.unpack -> c, xss) = L.splitAt 2 xs
in L.cons <$> readMay (\'0\' : \'x\' : c)
<*> doURLDecode xss
| otherwise = Nothing
doURLDecode (L.uncons -> Just (43, xs)) = (32 `L.cons`) <$> doURLDecode xs
doURLDecode (L.uncons -> Just (x, xs)) = (x `L.cons`) <$> doURLDecode xs
urlDecode = fromMaybe "" . doURLDecode
NGX_EXPORT_S_S (urlDecode)
-- compatible with Pandoc 2.8 (will not compile for older versions)
simpleHtmlTemplate = unsafePerformIO $ do
t <- compileTemplate "" $ T.pack "<html>\\n<body>\\n$body$</body></html>"
return $ case t of
Right a -> a
Left e -> error e
{-# NOINLINE simpleHtmlTemplate #-}
fromMd (T.decodeUtf8 -> x) = uncurry (, packLiteral 9 "text/html"#, , []) $
case runPure $ readMarkdown def x >>= writeHtml of
Right p -> (fromText p, 200)
Left (T.pack . displayException -> e) ->
(case runPure $ writeError e of
Right p -> fromText p
Left _ -> fromText e, 500)
where packLiteral l s =
accursedUnutterablePerformIO $ unsafePackAddressLen l s
fromText = C8L.fromStrict . T.encodeUtf8
writeHtml = writeHtml5String htmlWriterOptions
writeError = writeHtml . doc . para . singleton . Str
htmlWriterOptions = def { writerTemplate = Just simpleHtmlTemplate }
NGX_EXPORT_HANDLER (fromMd)
toYesNo "0" = "No"
toYesNo "1" = "Yes"
toYesNo _ = "Unknown"
NGX_EXPORT_S_S (toYesNo)
';
server {
listen 8010;
server_name main;
error_log /tmp/nginx-test-haskell-error.log;
access_log /tmp/nginx-test-haskell-access.log;
location / {
haskell_run toUpper $hs_a $arg_a;
echo "toUpper ($arg_a) = $hs_a";
if ($arg_b) {
haskell_run takeN $hs_a $arg_b $arg_a;
echo "takeN ($arg_a, $arg_b) = $hs_a";
break;
}
if ($arg_c) {
haskell_run reverse $hs_a $arg_c;
echo "reverse ($arg_c) = $hs_a";
break;
}
if ($arg_d) {
haskell_run matches $hs_a $arg_d $arg_a;
haskell_run urlDecode $hs_b $arg_a;
echo "matches ($arg_d, $hs_b) = $hs_a";
break;
}
if ($arg_e) {
haskell_run firstNotEmpty $hs_a $arg_f $arg_g $arg_a;
echo "firstNotEmpty ($arg_f, $arg_g, $arg_a) = $hs_a";
break;
}
if ($arg_l) {
haskell_run isInList $hs_a $arg_a secret1 secret2 secret3;
echo "isInList ($arg_a, <secret words>) = $hs_a";
break;
}
if ($arg_m) {
haskell_run isJSONListOfInts $hs_a $arg_m;
haskell_run urlDecode $hs_b $arg_m;
echo "isJSONListOfInts ($hs_b) = $hs_a";
break;
}
if ($arg_n) {
haskell_run jSONListOfIntsTakeN $hs_a $arg_take|$arg_n;
haskell_run urlDecode $hs_b $arg_n;
echo "jSONListOfIntsTakeN ($hs_b, $arg_take) = $hs_a";
break;
}
}
location /content {
haskell_run isJSONListOfInts $hs_a $arg_n;
haskell_run toYesNo $hs_b $hs_a;
haskell_run jSONListOfIntsTakeN $hs_c $arg_take|$arg_n;
haskell_run urlDecode $hs_d $arg_n;
haskell_content fromMd "
## Do some JSON parsing
### Given ``$hs_d``
* Is this list of integer numbers?
+ *$hs_b*
* Take $arg_take elements
+ *``$hs_c``*
";
}
}
} Haskell source code is loaded with directives haskell compile or haskell load. Both directives accept an absolute path to a haskell source file as their first argument and a haskell source code as their second argument. The code is getting saved to the path and compiled to a shared library when nginx starts. The directives have a subtle distinction: haskell compile always requires the code argument and runs compiler unconditionally, whereas haskell load checks if the target library exists and does not compile source code in this case, thus eliminating necessity of the source code argument. The module may load an arbitrary haskell code but only those functions are accessible from nginx that are exported with special macros NGX_EXPORT_S_S, NGX_EXPORT_S_SS, NGX_EXPORT_B_S and NGX_EXPORT_B_SS (here S_S, S_SS, B_S and B_SS stand for mnemonic types returns-String-accepts-String, returns-String-accepts-String-String, returns-Bool-accepts-String and returns-Bool-accepts-String-String), their list counterparts NGX_EXPORT_S_LS and NGX_EXPORT_B_LS (LS stands for List-of-Strings) and two macros that deal with bytestrings: NGX_EXPORT_Y_Y and NGX_EXPORT_B_Y (Y stands for bYte). For the sake of efficiency, bytestring macros accept strict but return (only Y_Y) lazy bytestrings. Effectively this means that only those functions are supported that return strings, bytestrings or booleans and accept one, two or more (only S_LS and B_LS) string arguments or one bytestring. In this example 10 custom haskell functions are exported: toUpper, takeN, reverse (which is normal reverse imported from Prelude), matches (which requires module Text.Regex.PCRE), firstNotEmpty, isInList, isJSONListOfInts, jSONListOfIntsTakeN, urlDecode and toYesNo. In my case this code won't compile due to ambiguity involved by presence of the two installed packages regex-pcre and regex-pcre-builtin, so I had to add an extra ghc compilation flag -ignore-package regex-pcre using directive haskell ghc_extra_options. Other flags include -XFlexibleInstances which allows declaration of instance UrlDecodable String. Class UrlDecodable provides function doURLDecode for decoding strings and bytestrings that was adopted from here. The bytestring instance of doURLDecode makes use of view patterns in its clauses, however this extension does not have to be declared explicitly because it was already enabled in a pragma from the wrapping haskell code provided by this module (see details in section Wrapping haskell code organization). In several clauses of doURLDecode there are explicit characters wrapped inside single quotes which are in turn escaped with backslashes to not confuse nginx parser as the haskell code itself is wrapped inside single quotes. Exported function urlDecode is defined via the string instance of doURLDecode: if decoding fails it returns an empty string. Let's look inside the server clause, in location / where the exported
haskell functions are used. Directive haskell_run takes three or more
arguments: it depends on the type of the exported function (S_S, S_SS etc.).
The first argument of the directive is the name of an exported haskell function,
the second argument is a custom variable where the function's return value will
be stored, and the remaining (one or two) arguments are complex values (in the
nginx notion: it means that they may contain arbitrary number of variables and
plain symbols) that correspond to the arguments of the exported function.
Directive haskell_run is allowed in server, location and location-if
clauses. In this example all returned strings are stored in the same variable
There is another haskell directive haskell_content which accepts a haskell function to generate HTTP response and an optional string that will be passed to the function. The function must be of one of the two types: strictByteString-to-lazyByteString and strictByteString-to-4tuple(lazyByteString,strictByteString,Int, list-of-pairs-of-strictByteStrings). It must be exported with NGX_EXPORT_DEF_HANDLER (default content handler) in the first case and NGX_EXPORT_HANDLER in the second case. The elements in the 4tuple correspond to returned content, its type (e.g. text/html etc.), HTTP status, and a list of custom response headers. Default content handler sets content type to text/plain and HTTP status to 200. Directive haskell_content is allowed in location and location-if clauses of the nginx configuration. In the location /content from the above example the directive haskell_content makes use of a haskell function fromMd to generate HTML response from a markdown text. Function fromMd translates a markdown text to HTML using Pandoc library. Notice that content type is built from a string literal with a magic hash at the end to avoid unnecessary expenses (see details about using string literals in section Optimized unsafe content handler). What about doing some tests? Let's first start nginx (in this example, from the directory where file nginx.conf is located). # nginx -c`pwd`/nginx.conf
[1 of 1] Compiling NgxHaskellUserRuntime ( /tmp/ngx_haskell.hs, /tmp/ngx_haskell.o )
Linking /tmp/ngx_haskell.so ... Nginx compiles haskell code at its start. Had compilation failed and nginx would not have started (see details about starting nginx in section Reloading of haskell code and static content). In this case the code is OK and we are moving forward. $ curl 'http://localhost:8010/?a=hello_world'
toUpper (hello_world) = HELLO_WORLD
$ curl 'http://localhost:8010/?a=hello_world&b=4'
takeN (hello_world, 4) = hell
$ curl 'http://localhost:8010/?a=hello_world&b=oops'
takeN (hello_world, oops) =
$ curl 'http://localhost:8010/?c=intelligence'
reverse (intelligence) = ecnegilletni
$ curl 'http://localhost:8010/?d=intelligence&a=%5Ei' # URL-encoded ^i
matches (intelligence, ^i) = 1
$ curl 'http://localhost:8010/?d=intelligence&a=%5EI' # URL-encoded ^I
matches (intelligence, ^I) = 0
$ curl 'http://localhost:8010/?e=1&g=intelligence&a=smart'
firstNotEmpty (, intelligence, smart) = intelligence
$ curl 'http://localhost:8010/?e=1&g=intelligence&f=smart'
firstNotEmpty (smart, intelligence, ) = smart
$ curl 'http://localhost:8010/?e=1'
firstNotEmpty (, , ) =
$ curl 'http://localhost:8010/?l=1'
isInList (, <secret words>) = 0
$ curl 'http://localhost:8010/?l=1&a=s'
isInList (s, <secret words>) = 0
$ curl 'http://localhost:8010/?l=1&a=secret2'
isInList (secret2, <secret words>) = 1
$ curl 'http://localhost:8010/?m=%5B1%2C2%2C3%5D' # URL-encoded [1,2,3]
isJSONListOfInts ([1,2,3]) = 1
$ curl 'http://localhost:8010/?m=unknown'
isJSONListOfInts (unknown) = 0
$ curl 'http://localhost:8010/?n=%5B10%2C20%2C30%2C40%5D&take=3' # URL-encoded [10,20,30,40]
jSONListOfIntsTakeN ([10,20,30,40], 3) = [10,20,30]
$ curl 'http://localhost:8010/?n=%5B10%2C20%2C30%2C40%5D&take=undefined'
jSONListOfIntsTakeN ([10,20,30,40], undefined) = [] Let's try location /content (in a browser it looks great!) $ curl -D- 'http://localhost:8010/content?n=%5B10%2C20%2C30%2C40%5D&take=3'
HTTP/1.1 200 OK
Server: nginx/1.16.0
Date: Wed, 27 Nov 2019 12:13:46 GMT
Content-Type: text/html
Content-Length: 277
Connection: keep-alive
<html>
<body>
<h2>Do some JSON parsing</h2>
<h3>Given <code>[10,20,30,40]</code></h3>
<ul>
<li><p>Is this list of integer numbers?</p>
<ul>
<li><em>Yes</em></li>
</ul></li>
<li><p>Take 3 elements</p>
<ul>
<li><em><code>[10,20,30]</code></em></li>
</ul></li>
</ul></body></html> Static content in HTTP responsesReading files in runtime inescapably drops nginx performance. Fortunately there is a haskell module Data.FileEmbed that makes it possible to embed files during ghc compilation time. Consider the following haskell content handler fromFile (C8.unpack -> "content.html") =
(L.fromStrict $(embedFile "/path/to/content.html"), "text/html", 200, [])
fromFile (C8.unpack -> "content.txt") =
(L.fromStrict $(embedFile "/path/to/content.txt"), "text/plain", 200, [])
fromFile _ =
(C8L.pack "File not found", "text/plain", 500, [])
NGX_EXPORT_HANDLER (fromFile) (to make it compile another option -XTemplateHaskell must be added into the directive haskell ghc_extra_options and the module Data.FileEmbed must be imported too). Now with a new location location /static {
haskell_static_content fromFile "content.html";
if ($arg_a) {
haskell_static_content fromFile "content.txt";
break;
}
} HTTP requests with URIs that start with /static will be responded with contents of files listed in the clauses of the function fromFile that have been embedded into the function during ghc compilation. Directive haskell_static_content runs its haskell handler and allocates response data only once in nginx worker's lifetime when the first request arrives and is processed in the location. On further requests these data are sent back without running the haskell handler. This makes directive haskell_static_content more optimal for returning static data comparing with haskell_content. Directive haskell_static_content is useful not only for returning files but for any content that can be evaluated only once in nginx worker's lifetime. Optimized unsafe content handlerNotice that starting from version 1.3 of this module, all content handlers do not pass copies to the C side! Instead, the underlying lazy bytestrings share their contents with nginx. So all the reasons about extra copying below are no longer actual. Let's go back to the example from the previous section. All the content handlers we met so far receive a copy of data produced in haskell handlers. Using references to the original data would lead to nasty things after haskell's garbage collector wakeup, so the only safe choice seems to be copying the original data1. Handler fromFile from the example takes static data embedded into the haskell library by Data.FileEmbed, makes a copy of this and passes it to the C code. It runs once per location during location configuration lifetime thanks to the directive haskell_static_content implementation. Nonetheless there are two duplicate static data copies in the program during its run which looks wasteful. It can get even worse when using haskell_static_content is not an option. Here is an example. Module Data.FileEmbed allows embedding all files in a directory recursively using template function embedDir. This make it possible to emulate nginx static files delivery feature. The following is a quick and dirty implementation. Haskell content handler. fromFile (tailSafe . C8.unpack -> f) =
case lookup f $(embedDir "/rootpath") of
Just p -> (L.fromStrict p, "text/plain", 200, [])
Nothing -> (C8L.pack "File not found", "text/plain", 404, [])
NGX_EXPORT_HANDLER (fromFile) Corresponding nginx location. location /static {
haskell_content fromFile $uri;
} In this example the files are expected in the directory /rootpath/static. As
soon as the target file is parameterized by the value of the To address unnecessary copying of static data, a new directive haskell_unsafe_content is introduced. With it the above example can be rewritten as follows. Haskell content handler. fromFile (tailSafe . C8.unpack -> f) =
case lookup f $(embedDir "/rootpath") of
Just p -> (p, text_plain, 200)
Nothing -> (packLiteral 14 "File not found"#, text_plain, 404)
where packLiteral l s = unsafePerformIO $ unsafePackAddressLen l s
text_plain = packLiteral 10 "text/plain"#
NGX_EXPORT_UNSAFE_HANDLER (fromFile) Corresponding nginx location. location /static {
haskell_unsafe_content fromFile $uri;
} The unsafe handler returns 3tuple(strictByteString,strictByteString,Int). The two strict bytestrings in it must correspond to the really static data, i.e. string literals like "File not found"#, "text/plain"# and those embedded by the Data.FileEmbed, otherwise the nasty things may happen! Literal strings that end with hashes (#) are actually addresses of compiled static byte arrays that do not change during runtime. To enable the hash literals option -XMagicHash must be added into the directive haskell ghc_extra_options. Working on such a low level requires using functions unsafePackAddressLen and unsafePerformIO from modules Data.ByteString.Unsafe and System.IO.Unsafe respectively (in this example unsafePerformIO can be safely replaced with the fastest and the unsafest IO unwrapper accursedUnutterablePerformIO from module Data.ByteString.Internal). Minimum requirements for using static byte arrays in the module Data.FileEmbed are: file-embed version 0.0.7 and Template Haskell version 2.5.0 (bundled with ghc since version 7.0.1). The unsafe content handler implementation from the above example can be found in file test/tsung/nginx-static.conf. 1 Did you read the notice in the beginning of the section? Yes, lazy bytestrings contents can be safely passed to the C side directly, provided stable pointers (StablePtr) to them are passed too. Creating a stable pointer to a bytestring makes it a root object that is guaranteed not to be garbage collected while the pointer is not freed. The bytestring itself can be relocated, but its buffers not! They are stored in pinned memory arrays that are not moved while the bytestring is alive. Asynchronous tasks with side effectsAll variable handlers we met so far were pure haskell functions without side effects. Inability to put side effects into pure functions has a great significance in the sense that it gives strong guarantees about the time the functions run. In haskell, functions that may produce side effects are normally wrapped inside IO monad. They can do various non-deterministic IO computations like reading or writing files, connecting to network servers etc., which, in principle, may last unpredictably long or even eternally. Despite this, having IO functions as nginx variable handlers are extremely tempting as it makes possible to perform arbitrary IO tasks during an HTTP request. To eliminate their non-probabilistic duration downside, they could be run asynchronously in green threads provided by the haskell RTS library, and somehow signal the nginx worker's main thread after their computations finish. This is exactly what happens in special handler NGX_EXPORT_ASYNC_IOY_Y. Consider the following example. user nobody;
worker_processes 2;
events {
worker_connections 1024;
}
http {
default_type application/octet-stream;
sendfile on;
haskell compile threaded standalone /tmp/ngx_haskell.hs '
import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy.Char8 as C8L
import Network.HTTP.Client
import Control.Concurrent
import Control.Exception
import Safe
catchHttpException = (`catch` \e ->
return $ C8L.pack $ "HTTP EXCEPTION: " ++ show (e :: HttpException))
getResponse (C8.unpack -> url) = fmap responseBody . (parseRequest url >>=)
getUrl url = do
man <- newManager defaultManagerSettings
catchHttpException $ getResponse url $ flip httpLbs man
NGX_EXPORT_ASYNC_IOY_Y (getUrl)
threadDelaySec = threadDelay . (* 10^6)
delay (readDef 0 . C8.unpack -> v) =
threadDelaySec v >> return (C8L.pack $ show v)
NGX_EXPORT_ASYNC_IOY_Y (delay)
';
server {
listen 8010;
server_name main;
error_log /tmp/nginx-test-haskell-error.log;
access_log /tmp/nginx-test-haskell-access.log;
location / {
|
2023-10-27
2022-08-15
2022-08-17
2022-09-23
2022-08-13
请发表评论