refractored Uu encoding, so it is more correct, added tests for Uu encode

This commit is contained in:
Stefan Friese 2024-06-09 20:56:00 +02:00
parent 6ab4cbaa79
commit 37985bd205
2 changed files with 131 additions and 32 deletions

View File

@ -1,30 +1,115 @@
-- module Encoding.UnixToUnix
-- ( encuu
-- , decuu
-- ) where
-- import Data.ByteString as B
-- import qualified Data.ByteString.Char8 as BC
-- import qualified Codec.Binary.Uu as UU
-- import Data.ByteString.UTF8 as BSU -- from utf8-string
-- import qualified Data.Text as T
-- import qualified Data.Text.Encoding as T
-- import qualified Data.Text.IO as T
-- -- decuu :: String -> String
-- -- -- decuu = C.unpack . U.fromRight . UU.decode . BSU.fromString
-- -- decuu input =
-- -- case UU.decode (T.encodeUtf8 (T.pack input)) of
-- -- Right decoded -> T.unpack (T.decodeUtf8 decoded)
-- -- Left _ -> "Error decoding UU.\n"
-- decuu :: B.ByteString -> B.ByteString
-- decuu input = case UU.decode input of
-- Right byteString -> byteString
-- Left _ -> BC.pack "Error: Invalid Unix to Unix encoding input"
-- -- encuu :: String -> String
-- -- -- encuu = C.unpack . UU.encode . BSU.fromString
-- -- encuu = T.unpack . T.decodeUtf8 . UU.encode . T.encodeUtf8 . T.pack
-- encuu :: B.ByteString -> B.ByteString
-- encuu = UU.encode
module Encoding.UnixToUnix module Encoding.UnixToUnix
( encuu ( encuu
, decuu , decuu
) where ) where
import Data.ByteString as B import Data.Bits (shiftL, shiftR, (.&.), (.|.))
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC import qualified Data.ByteString.Char8 as BC
import qualified Codec.Binary.Uu as UU import Data.Char (chr, ord)
import Data.ByteString.UTF8 as BSU -- from utf8-string import Data.Word (Word8)
import Data.List (unfoldr)
import qualified Data.Text as T uuAlphabet :: String
import qualified Data.Text.Encoding as T uuAlphabet = " !" ++ ['"'..'~']
import qualified Data.Text.IO as T
-- decuu :: String -> String -- Convert a 6-bit integer to a UUEncode character
-- -- decuu = C.unpack . U.fromRight . UU.decode . BSU.fromString toUUChar :: Int -> Char
-- decuu input = toUUChar n = uuAlphabet !! n
-- case UU.decode (T.encodeUtf8 (T.pack input)) of
-- Right decoded -> T.unpack (T.decodeUtf8 decoded)
-- Left _ -> "Error decoding UU.\n"
decuu :: B.ByteString -> B.ByteString
decuu input = case UU.decode input of
Right byteString -> byteString
Left _ -> BC.pack "Error: Invalid Unix to Unix encoding input"
-- encuu :: String -> String -- Convert a UUEncode character to a 6-bit integer
-- -- encuu = C.unpack . UU.encode . BSU.fromString fromUUChar :: Char -> Int
-- encuu = T.unpack . T.decodeUtf8 . UU.encode . T.encodeUtf8 . T.pack fromUUChar c = maybe (error "Invalid UUEncode character") id (lookup c (zip uuAlphabet [0..]))
encuu :: B.ByteString -> B.ByteString
encuu = UU.encode encuu :: ByteString -> ByteString
encuu bs = BC.concat . map encodeLine . chunkBy 45 $ B.unpack bs
where
encodeLine :: [Word8] -> ByteString
encodeLine line =
let lenChar = toUUChar (length line)
encodedData = concatMap (map toUUChar . encodeChunk) (chunkBy 3 line)
in BC.pack (lenChar : encodedData)
encodeChunk :: [Word8] -> [Int]
encodeChunk [x1, x2, x3] = [ fromIntegral ((x1 `shiftR` 2) .&. 0x3F)
, fromIntegral (((x1 `shiftL` 4) .&. 0x30) .|. ((x2 `shiftR` 4) .&. 0x0F))
, fromIntegral (((x2 `shiftL` 2) .&. 0x3C) .|. ((x3 `shiftR` 6) .&. 0x03))
, fromIntegral (x3 .&. 0x3F)
]
encodeChunk [x1, x2] = [ fromIntegral ((x1 `shiftR` 2) .&. 0x3F)
, fromIntegral (((x1 `shiftL` 4) .&. 0x30) .|. ((x2 `shiftR` 4) .&. 0x0F))
, fromIntegral ((x2 `shiftL` 2) .&. 0x3C)
]
encodeChunk [x1] = [ fromIntegral ((x1 `shiftR` 2) .&. 0x3F)
, fromIntegral ((x1 `shiftL` 4) .&. 0x30)
]
encodeChunk _ = error "Invalid chunk length"
decuu :: ByteString -> ByteString
decuu = B.concat . map decodeLine . BC.lines
where
decodeLine :: ByteString -> ByteString
decodeLine line =
let lenChar = BC.head line
len = fromUUChar lenChar
encodedData = BC.unpack (BC.tail line)
in B.pack $ take len (concatMap decodeChunk (chunkBy 4 encodedData))
decodeChunk :: [Char] -> [Word8]
decodeChunk [c1, c2, c3, c4] =
let n1 = fromUUChar c1
n2 = fromUUChar c2
n3 = fromUUChar c3
n4 = fromUUChar c4
in [ fromIntegral ((n1 `shiftL` 2) .|. (n2 `shiftR` 4))
, fromIntegral ((n2 `shiftL` 4) .|. (n3 `shiftR` 2))
, fromIntegral ((n3 `shiftL` 6) .|. n4)
]
decodeChunk [c1, c2, c3] =
let n1 = fromUUChar c1
n2 = fromUUChar c2
n3 = fromUUChar c3
in [ fromIntegral ((n1 `shiftL` 2) .|. (n2 `shiftR` 4))
, fromIntegral ((n2 `shiftL` 4) .|. (n3 `shiftR` 2))
]
decodeChunk [c1, c2] =
let n1 = fromUUChar c1
n2 = fromUUChar c2
in [ fromIntegral ((n1 `shiftL` 2) .|. (n2 `shiftR` 4))]
decodeChunk _ = error "Invalid encoded chunk length"
chunkBy :: Int -> [a] -> [[a]]
chunkBy _ [] = []
chunkBy n xs = take n xs : chunkBy n (drop n xs)

View File

@ -149,19 +149,31 @@ testDecQp = TestCase $ do
assertEqual "for (decqp \"QP=20works=20by=20using=20the=20equals=20sign=20=3D=20as=20an=20escape=20=\r\ncharacter.=20It=20also=20limits=20line=20length=20to=2076,=20as=20some=20=\r\nsoftware=20has=20limits=20on=20line=20length.\")," (BSU.fromString "QP works by using the equals sign = as an escape character. It also limits line length to 76, as some software has limits on line length.") (decqp $ BSU.fromString "QP=20works=20by=20using=20the=20equals=20sign=20=3D=20as=20an=20escape=20=\r\ncharacter.=20It=20also=20limits=20line=20length=20to=2076,=20as=20some=20=\r\nsoftware=20has=20limits=20on=20line=20length.") assertEqual "for (decqp \"QP=20works=20by=20using=20the=20equals=20sign=20=3D=20as=20an=20escape=20=\r\ncharacter.=20It=20also=20limits=20line=20length=20to=2076,=20as=20some=20=\r\nsoftware=20has=20limits=20on=20line=20length.\")," (BSU.fromString "QP works by using the equals sign = as an escape character. It also limits line length to 76, as some software has limits on line length.") (decqp $ BSU.fromString "QP=20works=20by=20using=20the=20equals=20sign=20=3D=20as=20an=20escape=20=\r\ncharacter.=20It=20also=20limits=20line=20length=20to=2076,=20as=20some=20=\r\nsoftware=20has=20limits=20on=20line=20length.")
assertEqual "for (decqp \"=F0=9F=98=82\")," emojiBS (decqp $ BSU.fromString "=F0=9F=98=82") assertEqual "for (decqp \"=F0=9F=98=82\")," emojiBS (decqp $ BSU.fromString "=F0=9F=98=82")
-- testEnUu :: Test testEncUu :: Test
-- testEnUu = TestCase $ do testEncUu = TestCase $ do
-- assertEqual "for (encuu \"Hello, World!\")," "110 145 154 154 157 54 40 127 157 162 154 144 41" (encuu "Hello, World!") assertEqual "for (encuu \"Hello, World!\")," (BSU.fromString "-2&5L;&\\L(%=O<FQD(0") (encuu helloWorldBS)
-- assertEqual "for (encuu \"Haskell\")," "110 141 163 153 145 154 154" (encuu "Haskell") assertEqual "for (encuu \"Haskell\")," (BSU.fromString "'2&%S:V5L; ") (encuu haskellBS)
-- assertEqual "for (encuu \"😂\")," "360 237 230 202" (encuu "😂") assertEqual "for (encuu \"😂\")," (BSU.fromString "$\\)^8@@") (encuu emojiBS)
-- testDecUu :: Test testDecUu :: Test
-- testDecUu = TestCase $ do testDecUu = TestCase $ do
-- assertEqual "for (decuu \"110 145 154 154 157 54 40 127 157 162 154 144 41\")," "Hello, World!" (decuu "110 145 154 154 157 54 40 127 157 162 154 144 41") assertEqual "for (decuu \"-2&5L;&\\L(%=O<FQD(0\")," helloWorldBS (decuu $ BSU.fromString "-2&5L;&\\L(%=O<FQD(0")
-- assertEqual "for (decuu \"110 141 163 153 145 154 154\")," "Haskell" (decuu "110 141 163 153 145 154 154") assertEqual "for (decuu \"'2&%S:V5L;\")," haskellBS (decuu $ BSU.fromString "'2&%S:V5L; ")
-- assertEqual "for (decuu \"360 237 230 202\")," "😂" (decuu "360 237 230 202") assertEqual "for (decuu \"$\\)^8@@\")," (BSU.fromString "😂") (decuu $ BSU.fromString "$\\)^8@@")
-- testEncXx :: Test
-- testEncXx = TestCase $ do
-- assertEqual "for (encxx \"Hello, World!\")," (BSU.fromString "BG4JgP4wg63RjQalY6E") (encxx helloWorldBS)
-- assertEqual "for (encxx \"Haskell\")," (BSU.fromString "'2&%S:V5L; ") (encxx haskellBS)
-- assertEqual "for (encxx \"😂\")," (BSU.fromString "$\\)^8@@") (encxx emojiBS)
-- testDecXx :: Test
-- testDecXx = TestCase $ do
-- assertEqual "for (decxx \"-2&5L;&\\L(%=O<FQD(0\")," helloWorldBS (decxx $ BSU.fromString "-2&5L;&\\L(%=O<FQD(0")
-- assertEqual "for (decxx \"'2&%S:V5L;\")," haskellBS (decxx $ BSU.fromString "'2&%S:V5L; ")
-- assertEqual "for (decxx \"$\\)^8@@\")," (BSU.fromString "😂") (decxx $ BSU.fromString "$\\)^8@@")
tests :: Test tests :: Test
tests = TestList [TestLabel "Test enc91" testEnc91, tests = TestList [TestLabel "Test enc91" testEnc91,
TestLabel "Test dec91" testDec91, TestLabel "Test dec91" testDec91,
@ -182,7 +194,9 @@ tests = TestList [TestLabel "Test enc91" testEnc91,
TestLabel "Test dec2 " testDec2 , TestLabel "Test dec2 " testDec2 ,
TestLabel "Test enc2 " testEnc2 , TestLabel "Test enc2 " testEnc2 ,
TestLabel "Test decqp" testDecQp, TestLabel "Test decqp" testDecQp,
TestLabel "Test encqp" testEncQp] TestLabel "Test encqp" testEncQp,
TestLabel "Test decuu" testDecUu,
TestLabel "Test encuu" testEncUu]
-- main :: IO Counts -- main :: IO Counts