refractored Uu encoding, so it is more correct, added tests for Uu encode

This commit is contained in:
Stefan Friese 2024-06-09 20:56:00 +02:00
parent 6ab4cbaa79
commit 37985bd205
2 changed files with 131 additions and 32 deletions

View File

@ -1,30 +1,115 @@
-- module Encoding.UnixToUnix
-- ( encuu
-- , decuu
-- ) where
-- import Data.ByteString as B
-- import qualified Data.ByteString.Char8 as BC
-- import qualified Codec.Binary.Uu as UU
-- import Data.ByteString.UTF8 as BSU -- from utf8-string
-- import qualified Data.Text as T
-- import qualified Data.Text.Encoding as T
-- import qualified Data.Text.IO as T
-- -- decuu :: String -> String
-- -- -- decuu = C.unpack . U.fromRight . UU.decode . BSU.fromString
-- -- decuu input =
-- -- case UU.decode (T.encodeUtf8 (T.pack input)) of
-- -- Right decoded -> T.unpack (T.decodeUtf8 decoded)
-- -- Left _ -> "Error decoding UU.\n"
-- decuu :: B.ByteString -> B.ByteString
-- decuu input = case UU.decode input of
-- Right byteString -> byteString
-- Left _ -> BC.pack "Error: Invalid Unix to Unix encoding input"
-- -- encuu :: String -> String
-- -- -- encuu = C.unpack . UU.encode . BSU.fromString
-- -- encuu = T.unpack . T.decodeUtf8 . UU.encode . T.encodeUtf8 . T.pack
-- encuu :: B.ByteString -> B.ByteString
-- encuu = UU.encode
module Encoding.UnixToUnix
( encuu
, decuu
) where
import Data.ByteString as B
import Data.Bits (shiftL, shiftR, (.&.), (.|.))
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Codec.Binary.Uu as UU
import Data.ByteString.UTF8 as BSU -- from utf8-string
import Data.Char (chr, ord)
import Data.Word (Word8)
import Data.List (unfoldr)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T
uuAlphabet :: String
uuAlphabet = " !" ++ ['"'..'~']
-- decuu :: String -> String
-- -- decuu = C.unpack . U.fromRight . UU.decode . BSU.fromString
-- decuu input =
-- case UU.decode (T.encodeUtf8 (T.pack input)) of
-- Right decoded -> T.unpack (T.decodeUtf8 decoded)
-- Left _ -> "Error decoding UU.\n"
decuu :: B.ByteString -> B.ByteString
decuu input = case UU.decode input of
Right byteString -> byteString
Left _ -> BC.pack "Error: Invalid Unix to Unix encoding input"
-- Convert a 6-bit integer to a UUEncode character
toUUChar :: Int -> Char
toUUChar n = uuAlphabet !! n
-- encuu :: String -> String
-- -- encuu = C.unpack . UU.encode . BSU.fromString
-- encuu = T.unpack . T.decodeUtf8 . UU.encode . T.encodeUtf8 . T.pack
encuu :: B.ByteString -> B.ByteString
encuu = UU.encode
-- Convert a UUEncode character to a 6-bit integer
fromUUChar :: Char -> Int
fromUUChar c = maybe (error "Invalid UUEncode character") id (lookup c (zip uuAlphabet [0..]))
encuu :: ByteString -> ByteString
encuu bs = BC.concat . map encodeLine . chunkBy 45 $ B.unpack bs
where
encodeLine :: [Word8] -> ByteString
encodeLine line =
let lenChar = toUUChar (length line)
encodedData = concatMap (map toUUChar . encodeChunk) (chunkBy 3 line)
in BC.pack (lenChar : encodedData)
encodeChunk :: [Word8] -> [Int]
encodeChunk [x1, x2, x3] = [ fromIntegral ((x1 `shiftR` 2) .&. 0x3F)
, fromIntegral (((x1 `shiftL` 4) .&. 0x30) .|. ((x2 `shiftR` 4) .&. 0x0F))
, fromIntegral (((x2 `shiftL` 2) .&. 0x3C) .|. ((x3 `shiftR` 6) .&. 0x03))
, fromIntegral (x3 .&. 0x3F)
]
encodeChunk [x1, x2] = [ fromIntegral ((x1 `shiftR` 2) .&. 0x3F)
, fromIntegral (((x1 `shiftL` 4) .&. 0x30) .|. ((x2 `shiftR` 4) .&. 0x0F))
, fromIntegral ((x2 `shiftL` 2) .&. 0x3C)
]
encodeChunk [x1] = [ fromIntegral ((x1 `shiftR` 2) .&. 0x3F)
, fromIntegral ((x1 `shiftL` 4) .&. 0x30)
]
encodeChunk _ = error "Invalid chunk length"
decuu :: ByteString -> ByteString
decuu = B.concat . map decodeLine . BC.lines
where
decodeLine :: ByteString -> ByteString
decodeLine line =
let lenChar = BC.head line
len = fromUUChar lenChar
encodedData = BC.unpack (BC.tail line)
in B.pack $ take len (concatMap decodeChunk (chunkBy 4 encodedData))
decodeChunk :: [Char] -> [Word8]
decodeChunk [c1, c2, c3, c4] =
let n1 = fromUUChar c1
n2 = fromUUChar c2
n3 = fromUUChar c3
n4 = fromUUChar c4
in [ fromIntegral ((n1 `shiftL` 2) .|. (n2 `shiftR` 4))
, fromIntegral ((n2 `shiftL` 4) .|. (n3 `shiftR` 2))
, fromIntegral ((n3 `shiftL` 6) .|. n4)
]
decodeChunk [c1, c2, c3] =
let n1 = fromUUChar c1
n2 = fromUUChar c2
n3 = fromUUChar c3
in [ fromIntegral ((n1 `shiftL` 2) .|. (n2 `shiftR` 4))
, fromIntegral ((n2 `shiftL` 4) .|. (n3 `shiftR` 2))
]
decodeChunk [c1, c2] =
let n1 = fromUUChar c1
n2 = fromUUChar c2
in [ fromIntegral ((n1 `shiftL` 2) .|. (n2 `shiftR` 4))]
decodeChunk _ = error "Invalid encoded chunk length"
chunkBy :: Int -> [a] -> [[a]]
chunkBy _ [] = []
chunkBy n xs = take n xs : chunkBy n (drop n xs)

View File

@ -149,19 +149,31 @@ testDecQp = TestCase $ do
assertEqual "for (decqp \"QP=20works=20by=20using=20the=20equals=20sign=20=3D=20as=20an=20escape=20=\r\ncharacter.=20It=20also=20limits=20line=20length=20to=2076,=20as=20some=20=\r\nsoftware=20has=20limits=20on=20line=20length.\")," (BSU.fromString "QP works by using the equals sign = as an escape character. It also limits line length to 76, as some software has limits on line length.") (decqp $ BSU.fromString "QP=20works=20by=20using=20the=20equals=20sign=20=3D=20as=20an=20escape=20=\r\ncharacter.=20It=20also=20limits=20line=20length=20to=2076,=20as=20some=20=\r\nsoftware=20has=20limits=20on=20line=20length.")
assertEqual "for (decqp \"=F0=9F=98=82\")," emojiBS (decqp $ BSU.fromString "=F0=9F=98=82")
-- testEnUu :: Test
-- testEnUu = TestCase $ do
-- assertEqual "for (encuu \"Hello, World!\")," "110 145 154 154 157 54 40 127 157 162 154 144 41" (encuu "Hello, World!")
-- assertEqual "for (encuu \"Haskell\")," "110 141 163 153 145 154 154" (encuu "Haskell")
-- assertEqual "for (encuu \"😂\")," "360 237 230 202" (encuu "😂")
testEncUu :: Test
testEncUu = TestCase $ do
assertEqual "for (encuu \"Hello, World!\")," (BSU.fromString "-2&5L;&\\L(%=O<FQD(0") (encuu helloWorldBS)
assertEqual "for (encuu \"Haskell\")," (BSU.fromString "'2&%S:V5L; ") (encuu haskellBS)
assertEqual "for (encuu \"😂\")," (BSU.fromString "$\\)^8@@") (encuu emojiBS)
-- testDecUu :: Test
-- testDecUu = TestCase $ do
-- assertEqual "for (decuu \"110 145 154 154 157 54 40 127 157 162 154 144 41\")," "Hello, World!" (decuu "110 145 154 154 157 54 40 127 157 162 154 144 41")
-- assertEqual "for (decuu \"110 141 163 153 145 154 154\")," "Haskell" (decuu "110 141 163 153 145 154 154")
-- assertEqual "for (decuu \"360 237 230 202\")," "😂" (decuu "360 237 230 202")
testDecUu :: Test
testDecUu = TestCase $ do
assertEqual "for (decuu \"-2&5L;&\\L(%=O<FQD(0\")," helloWorldBS (decuu $ BSU.fromString "-2&5L;&\\L(%=O<FQD(0")
assertEqual "for (decuu \"'2&%S:V5L;\")," haskellBS (decuu $ BSU.fromString "'2&%S:V5L; ")
assertEqual "for (decuu \"$\\)^8@@\")," (BSU.fromString "😂") (decuu $ BSU.fromString "$\\)^8@@")
-- testEncXx :: Test
-- testEncXx = TestCase $ do
-- assertEqual "for (encxx \"Hello, World!\")," (BSU.fromString "BG4JgP4wg63RjQalY6E") (encxx helloWorldBS)
-- assertEqual "for (encxx \"Haskell\")," (BSU.fromString "'2&%S:V5L; ") (encxx haskellBS)
-- assertEqual "for (encxx \"😂\")," (BSU.fromString "$\\)^8@@") (encxx emojiBS)
-- testDecXx :: Test
-- testDecXx = TestCase $ do
-- assertEqual "for (decxx \"-2&5L;&\\L(%=O<FQD(0\")," helloWorldBS (decxx $ BSU.fromString "-2&5L;&\\L(%=O<FQD(0")
-- assertEqual "for (decxx \"'2&%S:V5L;\")," haskellBS (decxx $ BSU.fromString "'2&%S:V5L; ")
-- assertEqual "for (decxx \"$\\)^8@@\")," (BSU.fromString "😂") (decxx $ BSU.fromString "$\\)^8@@")
tests :: Test
tests = TestList [TestLabel "Test enc91" testEnc91,
TestLabel "Test dec91" testDec91,
@ -182,7 +194,9 @@ tests = TestList [TestLabel "Test enc91" testEnc91,
TestLabel "Test dec2 " testDec2 ,
TestLabel "Test enc2 " testEnc2 ,
TestLabel "Test decqp" testDecQp,
TestLabel "Test encqp" testEncQp]
TestLabel "Test encqp" testEncQp,
TestLabel "Test decuu" testDecUu,
TestLabel "Test encuu" testEncUu]
-- main :: IO Counts