Projects STRLCPY graphql-engine Commits bb46cba5
🤬
  • server: refactor Hasura.Server.Compression for clarity/correctness

    context: This is  foundation work, before we change how the server chooses to compress or not
    part of effort: #5518
    
    -----
    
    Prior to this change it was difficult to understand how the functionality in this module related to the semantics of Accept-Encoding. We also didn't correctly handle directives with qvalues.
    
    After this change certain technical infelicities are called out without modifying the behavior of the server; for instance we continue to fall back to identity (no compression) in the case where technically we're supposed to return 406, and we also  continue to treat `*` conservatively as meaning “use no compression”.
    
    The only external change here is `gzip;q=x.y` now results in a zipped response.
    
    PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7213
    GitOrigin-RevId: 93fdbf4ac27c5e9daa15b51842dee9ad9d5af140
  • Loading...
  • Brandon Simmons committed with hasura-bot 2 months ago
    bb46cba5
    1 parent d831d983
  • ■ ■ ■ ■ ■
    server/graphql-engine.cabal
    skipped 1053 lines
    1054 1054   Hasura.RQL.WebhookTransformsSpec
    1055 1055   Hasura.Server.Auth.JWTSpec
    1056 1056   Hasura.Server.AuthSpec
     1057 + Hasura.Server.CompressionSpec
    1057 1058   Hasura.Server.InitSpec
    1058 1059   Hasura.Server.Init.ArgSpec
    1059 1060   Hasura.Server.MigrateSuite
    skipped 289 lines
  • ■ ■ ■ ■ ■ ■
    server/src-lib/Hasura/Server/App.hs
    skipped 385 lines
    386 386   let (respBytes, respHeaders) = case result of
    387 387   JSONResp (HttpResponse encJson h) -> (encJToLBS encJson, pure jsonHeader <> h)
    388 388   RawResp (HttpResponse rawBytes h) -> (rawBytes, h)
    389  - (compressedResp, mEncodingHeader, mCompressionType) = compressResponse (Wai.requestHeaders waiReq) respBytes
    390  - encodingHeader = onNothing mEncodingHeader []
     389 + (compressedResp, encodingType) = compressResponse (Wai.requestHeaders waiReq) respBytes
     390 + encodingHeader = maybeToList (contentEncodingHeader <$> encodingType)
    391 391   reqIdHeader = (requestIdHeader, txtToBs $ unRequestId reqId)
    392 392   allRespHeaders = pure reqIdHeader <> encodingHeader <> respHeaders <> authHdrs
    393  - lift $ logHttpSuccess scLogger scLoggingSettings userInfo reqId waiReq req respBytes compressedResp qTime mCompressionType reqHeaders httpLoggingMetadata
     393 + lift $ logHttpSuccess scLogger scLoggingSettings userInfo reqId waiReq req respBytes compressedResp qTime encodingType reqHeaders httpLoggingMetadata
    394 394   mapM_ setHeader allRespHeaders
    395 395   Spock.lazyBytes compressedResp
    396 396   
    skipped 791 lines
  • ■ ■ ■ ■ ■
    server/src-lib/Hasura/Server/Compression.hs
    1 1  module Hasura.Server.Compression
    2 2   ( compressResponse,
    3 3   CompressionType (..),
     4 + EncodingType,
     5 + identityEncoding,
     6 + contentEncodingHeader,
    4 7   compressionTypeToTxt,
     8 + compressFast,
     9 + 
     10 + -- * exported for testing
     11 + getAcceptedEncodings,
    5 12   )
    6 13  where
    7 14   
    8 15  import Codec.Compression.GZip qualified as GZ
    9 16  import Data.ByteString.Lazy qualified as BL
     17 +import Data.Set qualified as Set
    10 18  import Data.Text qualified as T
    11 19  import Hasura.Prelude
    12 20  import Hasura.Server.Utils (gzipHeader)
    13 21  import Network.HTTP.Types.Header qualified as NH
    14 22   
     23 +-- | Compressed encodings which hasura supports
    15 24  data CompressionType
    16 25   = CTGZip
    17  - deriving (Show, Eq)
     26 + deriving (Show, Eq, Ord)
     27 + 
     28 +-- | Accept-Encoding directives (from client) which hasura supports. @Nothing@
     29 +-- indicates identity (no compression)
     30 +type EncodingType = Maybe CompressionType
     31 + 
     32 +identityEncoding :: EncodingType
     33 +identityEncoding = Nothing
    18 34   
    19 35  compressionTypeToTxt :: CompressionType -> Text
    20 36  compressionTypeToTxt CTGZip = "gzip"
    21 37   
     38 +-- | A map from Accept-Encoding directives to corresponding Content-Encoding
     39 +-- headers (from server). NOTE: @identity@ is not a valid directive for this
     40 +-- header.
     41 +contentEncodingHeader :: CompressionType -> NH.Header
     42 +contentEncodingHeader CTGZip = gzipHeader
     43 + 
     44 +-- | Maybe compress the response body, based on the client's Accept-Encoding
     45 +-- and our own judgement.
    22 46  compressResponse ::
    23 47   NH.RequestHeaders ->
    24 48   BL.ByteString ->
    25  - (BL.ByteString, Maybe NH.Header, Maybe CompressionType)
    26  -compressResponse reqHeaders unCompressedResp =
    27  - let compressionTypeM = getRequestedCompression reqHeaders
    28  - appendCompressionType (res, headerM) = (res, headerM, compressionTypeM)
    29  - gzipCompressionParams =
    30  - GZ.defaultCompressParams {GZ.compressLevel = GZ.compressionLevel 1}
    31  - in appendCompressionType $ case compressionTypeM of
    32  - Just CTGZip -> (GZ.compressWith gzipCompressionParams unCompressedResp, Just gzipHeader)
    33  - Nothing -> (unCompressedResp, Nothing)
     49 + -- | The response body (possibly compressed), and the encoding chosen
     50 + (BL.ByteString, EncodingType)
     51 +compressResponse reqHeaders unCompressedResp
     52 + -- we have option to gzip:
     53 + | acceptedEncodings == Set.fromList [identityEncoding, Just CTGZip]
     54 + ||
     55 + -- we must gzip:
     56 + acceptedEncodings == Set.fromList [Just CTGZip] =
     57 + (compressFast CTGZip unCompressedResp, Just CTGZip)
     58 + -- we must only return an uncompressed response:
     59 + | acceptedEncodings == Set.fromList [identityEncoding] =
     60 + (unCompressedResp, identityEncoding)
     61 + -- this is technically a client error, but ignore for now (maintaining
     62 + -- current behavior); assume identity:
     63 + | otherwise =
     64 + (unCompressedResp, identityEncoding)
     65 + where
     66 + acceptedEncodings = getAcceptedEncodings reqHeaders
    34 67   
    35  -getRequestedCompression :: NH.RequestHeaders -> Maybe CompressionType
    36  -getRequestedCompression reqHeaders
    37  - | "gzip" `elem` acceptEncodingVals = Just CTGZip
    38  - | otherwise = Nothing
     68 +-- | Compress using
     69 +compressFast :: CompressionType -> BL.ByteString -> BL.ByteString
     70 +compressFast = \case
     71 + CTGZip -> GZ.compressWith gzipCompressionParams
    39 72   where
    40  - acceptEncodingVals =
     73 + gzipCompressionParams =
     74 + -- See Note [Compression ratios]
     75 + GZ.defaultCompressParams {GZ.compressLevel = GZ.compressionLevel 1}
     76 + 
     77 +-- | Which encodings can the client accept? The empty set returned here is an
     78 +-- error condition and the server tecnically ought to return a 406.
     79 +--
     80 +-- https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
     81 +getAcceptedEncodings :: NH.RequestHeaders -> Set.Set EncodingType
     82 +getAcceptedEncodings reqHeaders = Set.fromList acceptedEncodingTypes
     83 + where
     84 + rawHeaderVals =
    41 85   concatMap (splitHeaderVal . snd) $
    42 86   filter (\h -> fst h == NH.hAcceptEncoding) reqHeaders
    43 87   splitHeaderVal bs = map T.strip $ T.splitOn "," $ bsToTxt bs
     88 + -- we'll ignore qvalues, except (crucially) to determine if 'identity' is rejected:
     89 + identityRejected =
     90 + -- ...if we're explicitly rejecting identity, or...
     91 + "identity;q=0" `elem` rawHeaderVals
     92 + ||
     93 + -- ...rejecting anything not listed and identity is not listed
     94 + ( "*;q=0" `elem` rawHeaderVals
     95 + && (not $ any ("identity" `T.isPrefixOf`) rawHeaderVals)
     96 + )
     97 + gzipAccepted =
     98 + any ("gzip" `T.isPrefixOf`) rawHeaderVals
     99 + && ("gzip;q=0" `notElem` rawHeaderVals)
     100 + -- AFAICT missing header, or *, implies “send whatever you want”
     101 + -- https://www.rfc-editor.org/rfc/rfc7231#section-5.3.4
     102 + anyEncodingTechnicallyAcceptable =
     103 + null rawHeaderVals || rawHeaderVals == ["*"]
     104 + acceptedEncodingTypes
     105 + -- \| anyEncodingTechnicallyAcceptable = [Just CTGZip, identityEncoding]
     106 + -- NOTE!: For now to be conservative and maintain historical behavior we
     107 + -- will treat this case as “only identity is acceptable”:
     108 + | anyEncodingTechnicallyAcceptable = [identityEncoding]
     109 + | otherwise =
     110 + (guard gzipAccepted $> Just CTGZip)
     111 + <> (guard (not identityRejected) $> identityEncoding)
     112 + 
     113 +{-
     114 +Note [Compression ratios]
     115 +~~~~~~~~~~~~~~~~~~~~~~~~~
     116 + 
     117 +I did some measurements of compression ratios at `gzip -1` (libc) of some
     118 +randomly generated json, real json datasets, and output from our benchmarked
     119 +chinook queries:
     120 + 
     121 + 2552/6131 = 0.41
     122 + 4666/8718 = 0.53
     123 + 13921/27131 = 0.51
     124 + 5895/8879 = 0.66 <----- completely random strings
     125 + 8634/28261 = 0.30
     126 + 70422/372466 = 0.18
     127 + 
     128 + 200/600 = 0.33 <----| from chinook graphql benchmarks
     129 + 3000/33000 = 0.09 <----|
     130 + 13000/190000 = 0.07 <----'
     131 + 
     132 +Given these numbers I would suggest using a rule-of-thumb expected compression
     133 +ratio between 2:1 and 10:1, depending on what being conservative means in the
     134 +context.
     135 + 
     136 +I didn't test higher compression levels much, but `gzip -4` for the most part
     137 +resulted in less than 10% smaller output on random json, and ~30% on our highly
     138 +compressible benchmark output.
     139 + 
     140 +UPDATE (12/5):
     141 +~~~~~~~~~~~~~
     142 + 
     143 +Some recent data on compression ratios for graphql responsed (here as:
     144 +compressed_size / uncompressed_size) taken from cloud:
     145 + 
     146 +Aggregate across all responses where uncompressed > 700 bytes:
     147 + 
     148 + max: 0.891 (worst compression)
     149 + p99: 0.658
     150 + p95: 0.565
     151 + p75: 0.467
     152 + p50: 0.346
     153 + min: 0.005 (best compression)
     154 + 
     155 +Aggregate across responses where uncompressed > 17K bytes (90th percentile):
     156 + 
     157 + max: 0.773
     158 + p99: 0.414
     159 + p95: 0.304
     160 + p75: 0.202
     161 + p50: 0.172
     162 + min: 0.005
     163 + 
     164 +-}
    44 165   
  • ■ ■ ■ ■ ■ ■
    server/src-test/Hasura/Server/CompressionSpec.hs
     1 +module Hasura.Server.CompressionSpec (spec) where
     2 + 
     3 +import Data.Set qualified as Set
     4 +import Hasura.Prelude
     5 +import Hasura.Server.Compression
     6 +import Test.Hspec
     7 + 
     8 +spec :: Spec
     9 +spec = describe "serialized data compression" $ parallel do
     10 + describe "getAcceptedEncodings" do
     11 + it "detects gzip and not" do
     12 + getAcceptedEncodings [("x", "x"), ("accept-encoding", "gzip")]
     13 + `shouldBe` Set.fromList [Just CTGZip, identityEncoding]
     14 + 
     15 + getAcceptedEncodings [("accept-encoding", "brotli, gzip;q=0.9")]
     16 + `shouldBe` Set.fromList [Just CTGZip, identityEncoding]
     17 + 
     18 + getAcceptedEncodings [("accept-encoding", "brotli")]
     19 + `shouldBe` Set.fromList [identityEncoding]
     20 + 
     21 + getAcceptedEncodings [("accept-encoding", "identity;q=0.42,brotli, gzip;q=0.9")]
     22 + `shouldBe` Set.fromList [Just CTGZip, identityEncoding]
     23 + 
     24 + getAcceptedEncodings [("accept-encoding", "identity;q=0.42,brotli, gzip;q=0")]
     25 + `shouldBe` Set.fromList [identityEncoding]
     26 + 
     27 + it "handles explicit rejection of identity" do
     28 + getAcceptedEncodings [("accept-encoding", "identity;q=0,brotli, gzip;q=0.9")]
     29 + `shouldBe` Set.fromList [Just CTGZip]
     30 + 
     31 + -- strictly per spec this would result in a 406, but we'll likely
     32 + -- just decide to return uncompressed (identity) higher up
     33 + getAcceptedEncodings [("accept-encoding", "identity;q=0,brotli")]
     34 + `shouldBe` Set.fromList []
     35 + getAcceptedEncodings [("accept-encoding", "*;q=0,brotli")]
     36 + `shouldBe` Set.fromList []
     37 + 
     38 + getAcceptedEncodings [("accept-encoding", "gzip, *;q=0")]
     39 + `shouldBe` Set.fromList [Just CTGZip]
     40 + 
     41 + -- behaviors that might change if we decide it's worth it:
     42 + it "arbitrary/historical behavior" do
     43 + -- see Compression.hs for discussion
     44 + getAcceptedEncodings [("accept-encoding", "*")]
     45 + `shouldBe` Set.fromList [identityEncoding]
     46 + getAcceptedEncodings []
     47 + `shouldBe` Set.fromList [identityEncoding]
     48 + getAcceptedEncodings [("accept-encoding", "")]
     49 + `shouldBe` Set.fromList [identityEncoding]
     50 + 
Please wait...
Page is in error, reload to recover