From 37985bd2051d71601c9b6bd8fea06efae9739315 Mon Sep 17 00:00:00 2001 From: Stefan Friese Date: Sun, 9 Jun 2024 20:56:00 +0200 Subject: [PATCH] refractored Uu encoding, so it is more correct, added tests for Uu encode --- src/Encoding/UnixToUnix.hs | 127 +++++++++++++++++++++++++++++++------ test/Main.hs | 36 +++++++---- 2 files changed, 131 insertions(+), 32 deletions(-) diff --git a/src/Encoding/UnixToUnix.hs b/src/Encoding/UnixToUnix.hs index 761d27c..376915c 100644 --- a/src/Encoding/UnixToUnix.hs +++ b/src/Encoding/UnixToUnix.hs @@ -1,30 +1,115 @@ +-- module Encoding.UnixToUnix +-- ( encuu +-- , decuu +-- ) where + +-- import Data.ByteString as B +-- import qualified Data.ByteString.Char8 as BC +-- import qualified Codec.Binary.Uu as UU +-- import Data.ByteString.UTF8 as BSU -- from utf8-string + +-- import qualified Data.Text as T +-- import qualified Data.Text.Encoding as T +-- import qualified Data.Text.IO as T + +-- -- decuu :: String -> String +-- -- -- decuu = C.unpack . U.fromRight . UU.decode . BSU.fromString +-- -- decuu input = +-- -- case UU.decode (T.encodeUtf8 (T.pack input)) of +-- -- Right decoded -> T.unpack (T.decodeUtf8 decoded) +-- -- Left _ -> "Error decoding UU.\n" +-- decuu :: B.ByteString -> B.ByteString +-- decuu input = case UU.decode input of +-- Right byteString -> byteString +-- Left _ -> BC.pack "Error: Invalid Unix to Unix encoding input" + +-- -- encuu :: String -> String +-- -- -- encuu = C.unpack . UU.encode . BSU.fromString +-- -- encuu = T.unpack . T.decodeUtf8 . UU.encode . T.encodeUtf8 . T.pack +-- encuu :: B.ByteString -> B.ByteString +-- encuu = UU.encode + module Encoding.UnixToUnix ( encuu , decuu ) where -import Data.ByteString as B +import Data.Bits (shiftL, shiftR, (.&.), (.|.)) +import Data.ByteString (ByteString) +import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC -import qualified Codec.Binary.Uu as UU -import Data.ByteString.UTF8 as BSU -- from utf8-string +import Data.Char (chr, ord) +import Data.Word (Word8) +import Data.List (unfoldr) -import qualified Data.Text as T -import qualified Data.Text.Encoding as T -import qualified Data.Text.IO as T +uuAlphabet :: String +uuAlphabet = " !" ++ ['"'..'~'] --- decuu :: String -> String --- -- decuu = C.unpack . U.fromRight . UU.decode . BSU.fromString --- decuu input = --- case UU.decode (T.encodeUtf8 (T.pack input)) of --- Right decoded -> T.unpack (T.decodeUtf8 decoded) --- Left _ -> "Error decoding UU.\n" -decuu :: B.ByteString -> B.ByteString -decuu input = case UU.decode input of - Right byteString -> byteString - Left _ -> BC.pack "Error: Invalid Unix to Unix encoding input" +-- Convert a 6-bit integer to a UUEncode character +toUUChar :: Int -> Char +toUUChar n = uuAlphabet !! n --- encuu :: String -> String --- -- encuu = C.unpack . UU.encode . BSU.fromString --- encuu = T.unpack . T.decodeUtf8 . UU.encode . T.encodeUtf8 . T.pack -encuu :: B.ByteString -> B.ByteString -encuu = UU.encode +-- Convert a UUEncode character to a 6-bit integer +fromUUChar :: Char -> Int +fromUUChar c = maybe (error "Invalid UUEncode character") id (lookup c (zip uuAlphabet [0..])) + +encuu :: ByteString -> ByteString +encuu bs = BC.concat . map encodeLine . chunkBy 45 $ B.unpack bs + where + encodeLine :: [Word8] -> ByteString + encodeLine line = + let lenChar = toUUChar (length line) + encodedData = concatMap (map toUUChar . encodeChunk) (chunkBy 3 line) + in BC.pack (lenChar : encodedData) + + encodeChunk :: [Word8] -> [Int] + encodeChunk [x1, x2, x3] = [ fromIntegral ((x1 `shiftR` 2) .&. 0x3F) + , fromIntegral (((x1 `shiftL` 4) .&. 0x30) .|. ((x2 `shiftR` 4) .&. 0x0F)) + , fromIntegral (((x2 `shiftL` 2) .&. 0x3C) .|. ((x3 `shiftR` 6) .&. 0x03)) + , fromIntegral (x3 .&. 0x3F) + ] + encodeChunk [x1, x2] = [ fromIntegral ((x1 `shiftR` 2) .&. 0x3F) + , fromIntegral (((x1 `shiftL` 4) .&. 0x30) .|. ((x2 `shiftR` 4) .&. 0x0F)) + , fromIntegral ((x2 `shiftL` 2) .&. 0x3C) + ] + encodeChunk [x1] = [ fromIntegral ((x1 `shiftR` 2) .&. 0x3F) + , fromIntegral ((x1 `shiftL` 4) .&. 0x30) + ] + encodeChunk _ = error "Invalid chunk length" + +decuu :: ByteString -> ByteString +decuu = B.concat . map decodeLine . BC.lines + where + decodeLine :: ByteString -> ByteString + decodeLine line = + let lenChar = BC.head line + len = fromUUChar lenChar + encodedData = BC.unpack (BC.tail line) + in B.pack $ take len (concatMap decodeChunk (chunkBy 4 encodedData)) + + decodeChunk :: [Char] -> [Word8] + decodeChunk [c1, c2, c3, c4] = + let n1 = fromUUChar c1 + n2 = fromUUChar c2 + n3 = fromUUChar c3 + n4 = fromUUChar c4 + in [ fromIntegral ((n1 `shiftL` 2) .|. (n2 `shiftR` 4)) + , fromIntegral ((n2 `shiftL` 4) .|. (n3 `shiftR` 2)) + , fromIntegral ((n3 `shiftL` 6) .|. n4) + ] + decodeChunk [c1, c2, c3] = + let n1 = fromUUChar c1 + n2 = fromUUChar c2 + n3 = fromUUChar c3 + in [ fromIntegral ((n1 `shiftL` 2) .|. (n2 `shiftR` 4)) + , fromIntegral ((n2 `shiftL` 4) .|. (n3 `shiftR` 2)) + ] + decodeChunk [c1, c2] = + let n1 = fromUUChar c1 + n2 = fromUUChar c2 + in [ fromIntegral ((n1 `shiftL` 2) .|. (n2 `shiftR` 4))] + decodeChunk _ = error "Invalid encoded chunk length" + +chunkBy :: Int -> [a] -> [[a]] +chunkBy _ [] = [] +chunkBy n xs = take n xs : chunkBy n (drop n xs) diff --git a/test/Main.hs b/test/Main.hs index 3a3ec69..1b7806e 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -149,19 +149,31 @@ testDecQp = TestCase $ do assertEqual "for (decqp \"QP=20works=20by=20using=20the=20equals=20sign=20=3D=20as=20an=20escape=20=\r\ncharacter.=20It=20also=20limits=20line=20length=20to=2076,=20as=20some=20=\r\nsoftware=20has=20limits=20on=20line=20length.\")," (BSU.fromString "QP works by using the equals sign = as an escape character. It also limits line length to 76, as some software has limits on line length.") (decqp $ BSU.fromString "QP=20works=20by=20using=20the=20equals=20sign=20=3D=20as=20an=20escape=20=\r\ncharacter.=20It=20also=20limits=20line=20length=20to=2076,=20as=20some=20=\r\nsoftware=20has=20limits=20on=20line=20length.") assertEqual "for (decqp \"=F0=9F=98=82\")," emojiBS (decqp $ BSU.fromString "=F0=9F=98=82") --- testEnUu :: Test --- testEnUu = TestCase $ do --- assertEqual "for (encuu \"Hello, World!\")," "110 145 154 154 157 54 40 127 157 162 154 144 41" (encuu "Hello, World!") --- assertEqual "for (encuu \"Haskell\")," "110 141 163 153 145 154 154" (encuu "Haskell") --- assertEqual "for (encuu \"😂\")," "360 237 230 202" (encuu "😂") +testEncUu :: Test +testEncUu = TestCase $ do + assertEqual "for (encuu \"Hello, World!\")," (BSU.fromString "-2&5L;&\\L(%=O