1 | 1 | | module Hasura.Server.Compression |
2 | 2 | | ( compressResponse, |
3 | 3 | | CompressionType (..), |
| 4 | + | EncodingType, |
| 5 | + | identityEncoding, |
| 6 | + | contentEncodingHeader, |
4 | 7 | | compressionTypeToTxt, |
| 8 | + | compressFast, |
| 9 | + | |
| 10 | + | -- * exported for testing |
| 11 | + | getAcceptedEncodings, |
5 | 12 | | ) |
6 | 13 | | where |
7 | 14 | | |
8 | 15 | | import Codec.Compression.GZip qualified as GZ |
9 | 16 | | import Data.ByteString.Lazy qualified as BL |
| 17 | + | import Data.Set qualified as Set |
10 | 18 | | import Data.Text qualified as T |
11 | 19 | | import Hasura.Prelude |
12 | 20 | | import Hasura.Server.Utils (gzipHeader) |
13 | 21 | | import Network.HTTP.Types.Header qualified as NH |
14 | 22 | | |
| 23 | + | -- | Compressed encodings which hasura supports |
15 | 24 | | data CompressionType |
16 | 25 | | = CTGZip |
17 | | - | deriving (Show, Eq) |
| 26 | + | deriving (Show, Eq, Ord) |
| 27 | + | |
| 28 | + | -- | Accept-Encoding directives (from client) which hasura supports. @Nothing@ |
| 29 | + | -- indicates identity (no compression) |
| 30 | + | type EncodingType = Maybe CompressionType |
| 31 | + | |
| 32 | + | identityEncoding :: EncodingType |
| 33 | + | identityEncoding = Nothing |
18 | 34 | | |
19 | 35 | | compressionTypeToTxt :: CompressionType -> Text |
20 | 36 | | compressionTypeToTxt CTGZip = "gzip" |
21 | 37 | | |
| 38 | + | -- | A map from Accept-Encoding directives to corresponding Content-Encoding |
| 39 | + | -- headers (from server). NOTE: @identity@ is not a valid directive for this |
| 40 | + | -- header. |
| 41 | + | contentEncodingHeader :: CompressionType -> NH.Header |
| 42 | + | contentEncodingHeader CTGZip = gzipHeader |
| 43 | + | |
| 44 | + | -- | Maybe compress the response body, based on the client's Accept-Encoding |
| 45 | + | -- and our own judgement. |
22 | 46 | | compressResponse :: |
23 | 47 | | NH.RequestHeaders -> |
24 | 48 | | BL.ByteString -> |
25 | | - | (BL.ByteString, Maybe NH.Header, Maybe CompressionType) |
26 | | - | compressResponse reqHeaders unCompressedResp = |
27 | | - | let compressionTypeM = getRequestedCompression reqHeaders |
28 | | - | appendCompressionType (res, headerM) = (res, headerM, compressionTypeM) |
29 | | - | gzipCompressionParams = |
30 | | - | GZ.defaultCompressParams {GZ.compressLevel = GZ.compressionLevel 1} |
31 | | - | in appendCompressionType $ case compressionTypeM of |
32 | | - | Just CTGZip -> (GZ.compressWith gzipCompressionParams unCompressedResp, Just gzipHeader) |
33 | | - | Nothing -> (unCompressedResp, Nothing) |
| 49 | + | -- | The response body (possibly compressed), and the encoding chosen |
| 50 | + | (BL.ByteString, EncodingType) |
| 51 | + | compressResponse reqHeaders unCompressedResp |
| 52 | + | -- we have option to gzip: |
| 53 | + | | acceptedEncodings == Set.fromList [identityEncoding, Just CTGZip] |
| 54 | + | || |
| 55 | + | -- we must gzip: |
| 56 | + | acceptedEncodings == Set.fromList [Just CTGZip] = |
| 57 | + | (compressFast CTGZip unCompressedResp, Just CTGZip) |
| 58 | + | -- we must only return an uncompressed response: |
| 59 | + | | acceptedEncodings == Set.fromList [identityEncoding] = |
| 60 | + | (unCompressedResp, identityEncoding) |
| 61 | + | -- this is technically a client error, but ignore for now (maintaining |
| 62 | + | -- current behavior); assume identity: |
| 63 | + | | otherwise = |
| 64 | + | (unCompressedResp, identityEncoding) |
| 65 | + | where |
| 66 | + | acceptedEncodings = getAcceptedEncodings reqHeaders |
34 | 67 | | |
35 | | - | getRequestedCompression :: NH.RequestHeaders -> Maybe CompressionType |
36 | | - | getRequestedCompression reqHeaders |
37 | | - | | "gzip" `elem` acceptEncodingVals = Just CTGZip |
38 | | - | | otherwise = Nothing |
| 68 | + | -- | Compress using |
| 69 | + | compressFast :: CompressionType -> BL.ByteString -> BL.ByteString |
| 70 | + | compressFast = \case |
| 71 | + | CTGZip -> GZ.compressWith gzipCompressionParams |
39 | 72 | | where |
40 | | - | acceptEncodingVals = |
| 73 | + | gzipCompressionParams = |
| 74 | + | -- See Note [Compression ratios] |
| 75 | + | GZ.defaultCompressParams {GZ.compressLevel = GZ.compressionLevel 1} |
| 76 | + | |
| 77 | + | -- | Which encodings can the client accept? The empty set returned here is an |
| 78 | + | -- error condition and the server tecnically ought to return a 406. |
| 79 | + | -- |
| 80 | + | -- https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding |
| 81 | + | getAcceptedEncodings :: NH.RequestHeaders -> Set.Set EncodingType |
| 82 | + | getAcceptedEncodings reqHeaders = Set.fromList acceptedEncodingTypes |
| 83 | + | where |
| 84 | + | rawHeaderVals = |
41 | 85 | | concatMap (splitHeaderVal . snd) $ |
42 | 86 | | filter (\h -> fst h == NH.hAcceptEncoding) reqHeaders |
43 | 87 | | splitHeaderVal bs = map T.strip $ T.splitOn "," $ bsToTxt bs |
| 88 | + | -- we'll ignore qvalues, except (crucially) to determine if 'identity' is rejected: |
| 89 | + | identityRejected = |
| 90 | + | -- ...if we're explicitly rejecting identity, or... |
| 91 | + | "identity;q=0" `elem` rawHeaderVals |
| 92 | + | || |
| 93 | + | -- ...rejecting anything not listed and identity is not listed |
| 94 | + | ( "*;q=0" `elem` rawHeaderVals |
| 95 | + | && (not $ any ("identity" `T.isPrefixOf`) rawHeaderVals) |
| 96 | + | ) |
| 97 | + | gzipAccepted = |
| 98 | + | any ("gzip" `T.isPrefixOf`) rawHeaderVals |
| 99 | + | && ("gzip;q=0" `notElem` rawHeaderVals) |
| 100 | + | -- AFAICT missing header, or *, implies “send whatever you want” |
| 101 | + | -- https://www.rfc-editor.org/rfc/rfc7231#section-5.3.4 |
| 102 | + | anyEncodingTechnicallyAcceptable = |
| 103 | + | null rawHeaderVals || rawHeaderVals == ["*"] |
| 104 | + | acceptedEncodingTypes |
| 105 | + | -- \| anyEncodingTechnicallyAcceptable = [Just CTGZip, identityEncoding] |
| 106 | + | -- NOTE!: For now to be conservative and maintain historical behavior we |
| 107 | + | -- will treat this case as “only identity is acceptable”: |
| 108 | + | | anyEncodingTechnicallyAcceptable = [identityEncoding] |
| 109 | + | | otherwise = |
| 110 | + | (guard gzipAccepted $> Just CTGZip) |
| 111 | + | <> (guard (not identityRejected) $> identityEncoding) |
| 112 | + | |
| 113 | + | {- |
| 114 | + | Note [Compression ratios] |
| 115 | + | ~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 116 | + | |
| 117 | + | I did some measurements of compression ratios at `gzip -1` (libc) of some |
| 118 | + | randomly generated json, real json datasets, and output from our benchmarked |
| 119 | + | chinook queries: |
| 120 | + | |
| 121 | + | 2552/6131 = 0.41 |
| 122 | + | 4666/8718 = 0.53 |
| 123 | + | 13921/27131 = 0.51 |
| 124 | + | 5895/8879 = 0.66 <----- completely random strings |
| 125 | + | 8634/28261 = 0.30 |
| 126 | + | 70422/372466 = 0.18 |
| 127 | + | |
| 128 | + | 200/600 = 0.33 <----| from chinook graphql benchmarks |
| 129 | + | 3000/33000 = 0.09 <----| |
| 130 | + | 13000/190000 = 0.07 <----' |
| 131 | + | |
| 132 | + | Given these numbers I would suggest using a rule-of-thumb expected compression |
| 133 | + | ratio between 2:1 and 10:1, depending on what being conservative means in the |
| 134 | + | context. |
| 135 | + | |
| 136 | + | I didn't test higher compression levels much, but `gzip -4` for the most part |
| 137 | + | resulted in less than 10% smaller output on random json, and ~30% on our highly |
| 138 | + | compressible benchmark output. |
| 139 | + | |
| 140 | + | UPDATE (12/5): |
| 141 | + | ~~~~~~~~~~~~~ |
| 142 | + | |
| 143 | + | Some recent data on compression ratios for graphql responsed (here as: |
| 144 | + | compressed_size / uncompressed_size) taken from cloud: |
| 145 | + | |
| 146 | + | Aggregate across all responses where uncompressed > 700 bytes: |
| 147 | + | |
| 148 | + | max: 0.891 (worst compression) |
| 149 | + | p99: 0.658 |
| 150 | + | p95: 0.565 |
| 151 | + | p75: 0.467 |
| 152 | + | p50: 0.346 |
| 153 | + | min: 0.005 (best compression) |
| 154 | + | |
| 155 | + | Aggregate across responses where uncompressed > 17K bytes (90th percentile): |
| 156 | + | |
| 157 | + | max: 0.773 |
| 158 | + | p99: 0.414 |
| 159 | + | p95: 0.304 |
| 160 | + | p75: 0.202 |
| 161 | + | p50: 0.172 |
| 162 | + | min: 0.005 |
| 163 | + | |
| 164 | + | -} |
44 | 165 | | |