Skip to content
Snippets Groups Projects
Verified Commit 5ea12048 authored by Julian Ospald's avatar Julian Ospald :tea:
Browse files

Merge branch 'some-more'

parents 1b722387 c0e8d309
Branches
Tags
No related merge requests found
......@@ -3,3 +3,4 @@
- ignore: {name: "Use if"}
- ignore: {name: "Use fmap"}
- ignore: {name: "Use uncurry"}
- ignore: {name: "Use unless"}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE TypeApplications #-}
-- This template expects CPP definitions for:
-- MODULE_NAME = Posix | Windows
......@@ -127,17 +128,24 @@ import Data.List(stripPrefix, isSuffixOf, uncons)
#define FILEPATH FilePath
#else
import Prelude (fromIntegral)
import System.OsPath.Encoding ( encodeWithTE )
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
import Control.Exception ( SomeException, evaluate, try, displayException )
import Data.Bifunctor (first)
import Control.DeepSeq (force)
import GHC.IO (unsafePerformIO)
import qualified Data.Char as C
#ifdef WINDOWS
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
import qualified GHC.Foreign as GHC
import Data.Word ( Word16 )
import System.OsPath.Data.ByteString.Short.Word16
import System.OsPath.Data.ByteString.Short ( packCStringLen )
#define CHAR Word16
#define STRING ShortByteString
#define FILEPATH ShortByteString
#else
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
import qualified GHC.Foreign as GHC
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
import Data.Word ( Word8 )
import System.OsPath.Data.ByteString.Short
......@@ -1180,10 +1188,14 @@ snoc str = \c -> str <> [c]
#else
#ifdef WINDOWS
fromString :: P.String -> STRING
fromString = P.either (P.error . P.show) P.id . encodeWithTE (mkUTF16le ErrorOnCodingFailure)
fromString str = P.either (P.error . P.show) P.id $ unsafePerformIO $ do
r <- try @SomeException $ GHC.withCStringLen (mkUTF16le ErrorOnCodingFailure) str $ \cstr -> packCStringLen cstr
evaluate $ force $ first displayException r
#else
fromString :: P.String -> STRING
fromString = P.either (P.error . P.show) P.id . encodeWithTE (mkUTF8 ErrorOnCodingFailure)
fromString str = P.either (P.error . P.show) P.id $ unsafePerformIO $ do
r <- try @SomeException $ GHC.withCStringLen (mkUTF8 ErrorOnCodingFailure) str $ \cstr -> packCStringLen cstr
evaluate $ force $ first displayException r
#endif
_a, _z, _A, _Z, _period, _quotedbl, _backslash, _slash, _question, _U, _N, _C, _colon, _semicolon, _US, _less, _greater, _bar, _asterisk, _nul, _space, _underscore :: CHAR
......
......@@ -30,8 +30,7 @@
-- are often interpreted as UTF8) as per the
-- <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_170 POSIX specification>
-- and are passed as @char[]@ to syscalls. 'OsPath' maintains no invariant
-- here. Some functions however, such as 'toOsPathUtf', may expect
-- or produce UTF8.
-- here.
--
-- Apart from encoding, filepaths have additional restrictions per platform:
--
......@@ -50,14 +49,13 @@
-- It is advised to follow these principles when dealing with filepaths\/filenames:
--
-- 1. Avoid interpreting filenames that the OS returns, unless absolutely necessary.
-- For example, the filepath separator is usually a predefined 'Word8', regardless of encoding.
-- For example, the filepath separator is usually a predefined 'Word8'/'Word16', regardless of encoding.
-- So even if we need to split filepaths, it might still not be necessary to understand the encoding
-- of the filename.
-- 2. When interpreting OS returned filenames consider that these might not be UTF8 on /unix/
-- or at worst don't have an ASCII compatible encoding. Some strategies here involve looking
-- up the current locale and using that for decoding ('fromOsPathFS' does this).
-- Otherwise it can be reasonable to assume UTF8 on unix ('fromOsPathUtf' does that) if your application specifically
-- mentions that it requires a UTF8 compatible system. If you know the encoding, you can just use 'fromOsPathEnc'.
-- or at worst don't have an ASCII compatible encoding. The are 3 available strategies fer decoding/encoding:
-- a) pick the best UTF (UTF-8 on unix, UTF-16LE on windows), b) decode with an explicitly defined 'TextEncoding',
-- c) mimic the behavior of the @base@ library (permissive UTF16 on windows, current filesystem encoding on unix).
-- 3. Avoid comparing @String@ based filepaths, because filenames of different encodings
-- may have the same @String@ representation, although they're not the same byte-wise.
......
{-# LANGUAGE TypeApplications #-}
-- This template expects CPP definitions for:
--
-- WINDOWS defined? = no | yes | no
......@@ -115,7 +116,6 @@ import System.OsString.Windows as PS
, decodeWith
, decodeFS
, pack
, pstr
, encodeUtf
, encodeWith
, encodeFS
......@@ -123,9 +123,23 @@ import System.OsString.Windows as PS
)
import Data.Bifunctor ( bimap )
import qualified System.OsPath.Windows.Internal as C
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
import Language.Haskell.TH.Quote
( QuasiQuoter (..) )
import Language.Haskell.TH.Syntax
( Lift (..), lift )
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
import Control.Monad ( when )
#elif defined(POSIX)
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
import Control.Monad ( when )
import Language.Haskell.TH.Quote
( QuasiQuoter (..) )
import Language.Haskell.TH.Syntax
( Lift (..), lift )
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
import System.OsPath.Types
import System.OsString.Posix as PS
( unsafeFromChar
......@@ -134,7 +148,6 @@ import System.OsString.Posix as PS
, decodeWith
, decodeFS
, pack
, pstr
, encodeUtf
, encodeWith
, encodeFS
......@@ -158,7 +171,7 @@ import System.OsPath.Internal as PS
)
import System.OsPath.Types
( OsPath )
import System.OsString
import System.OsString ( unsafeFromChar, toChar )
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
import qualified System.OsPath.Windows as C
......
......@@ -21,8 +21,6 @@ module System.OsPath.Encoding
, utf16le_b_encode
-- * base encoding
, encodeWithTE
, decodeWithTE
, encodeWithBasePosix
, decodeWithBasePosix
, encodeWithBaseWindows
......
......@@ -28,7 +28,7 @@ import Data.Bifunctor (first)
import Data.Data (Typeable)
import GHC.Show (Show (show))
import Numeric (showHex)
import Foreign.C (CString, CStringLen)
import Foreign.C (CStringLen)
import Data.Char (chr)
import Foreign
import Prelude (FilePath)
......@@ -260,39 +260,19 @@ charsToCWchars = foldr (utf16Char . ord) []
-- FFI
--
-- | Marshal a Haskell string into a NUL terminated C wide string using
-- temporary storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * the memory is freed when the subcomputation terminates (either
-- normally or via an exception), so the pointer to the temporary
-- storage must /not/ be used after this.
--
withCWString :: String -> (Ptr Word16 -> IO a) -> IO a
withCWString = withArray0 wNUL . charsToCWchars
withFilePathWin :: FilePath -> (Int -> Ptr Word16 -> IO a) -> IO a
withFilePathWin = withArrayLen . charsToCWchars
peekCWString :: Ptr Word16 -> IO String
peekCWString cp = do
cs <- peekArray0 wNUL cp
peekFilePathWin :: (Ptr Word16, Int) -> IO FilePath
peekFilePathWin (cp, l) = do
cs <- peekArray l cp
return (cWcharsToChars cs)
withFilePathWin :: FilePath -> (Ptr Word16 -> IO a) -> IO a
withFilePathWin = withCWString
peekFilePathWin :: Ptr Word16 -> IO FilePath
peekFilePathWin = peekCWString
withFilePathPosix :: FilePath -> (CStringLen -> IO a) -> IO a
withFilePathPosix fp f = getFileSystemEncoding >>= \enc -> GHC.withCStringLen enc fp f
withFilePathPosix :: FilePath -> (CString -> IO a) -> IO a
withFilePathPosix fp f = getFileSystemEncoding >>= \enc -> GHC.withCString enc fp f
peekFilePathLenPosix :: CStringLen -> IO FilePath
peekFilePathLenPosix fp = getFileSystemEncoding >>= \enc -> GHC.peekCStringLen enc fp
-- -----------------------------------------------------------------------------
-- Encoders / decoders
--
peekFilePathPosix :: CStringLen -> IO FilePath
peekFilePathPosix fp = getFileSystemEncoding >>= \enc -> GHC.peekCStringLen enc fp
-- | Decode with the given 'TextEncoding'.
decodeWithTE :: TextEncoding -> BS8.ShortByteString -> Either EncodingException String
......@@ -306,21 +286,33 @@ encodeWithTE enc str = unsafePerformIO $ do
r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> BS8.packCStringLen cstr
evaluate $ force $ first (flip EncodingError Nothing . displayException) r
-- | This mimics the filepath ddecoder base uses on unix.
-- -----------------------------------------------------------------------------
-- Encoders / decoders
--
-- | This mimics the filepath decoder base uses on unix,
-- with the small distinction that we're not truncating at NUL bytes (because we're not at
-- the outer FFI layer).
decodeWithBasePosix :: BS8.ShortByteString -> IO String
decodeWithBasePosix ba = BS8.useAsCStringLen ba $ \fp -> peekFilePathLenPosix fp
decodeWithBasePosix ba = BS8.useAsCStringLen ba $ \fp -> peekFilePathPosix fp
-- | This mimics the filepath dencoder base uses on unix.
-- | This mimics the filepath dencoder base uses on unix,
-- with the small distinction that we're not truncating at NUL bytes (because we're not at
-- the outer FFI layer).
encodeWithBasePosix :: String -> IO BS8.ShortByteString
encodeWithBasePosix str = withFilePathPosix str $ \cstr -> BS8.packCString cstr
-- | This mimics the filepath decoder base uses on windows.
decodeWithBaseWindows :: BS16.ShortByteString -> String
decodeWithBaseWindows = cWcharsToChars . BS16.unpack
-- | This mimics the filepath dencoder base uses on windows.
encodeWithBaseWindows :: String -> BS8.ShortByteString
encodeWithBaseWindows = BS16.pack . charsToCWchars
encodeWithBasePosix str = withFilePathPosix str $ \cstr -> BS8.packCStringLen cstr
-- | This mimics the filepath decoder base uses on windows,
-- with the small distinction that we're not truncating at NUL bytes (because we're not at
-- the outer FFI layer).
decodeWithBaseWindows :: BS16.ShortByteString -> IO String
decodeWithBaseWindows ba = BS16.useAsCWStringLen ba $ \fp -> peekFilePathWin fp
-- | This mimics the filepath dencoder base uses on windows,
-- with the small distinction that we're not truncating at NUL bytes (because we're not at
-- the outer FFI layer).
encodeWithBaseWindows :: String -> IO BS16.ShortByteString
encodeWithBaseWindows str = withFilePathWin str $ \l cstr -> BS16.packCWStringLen (cstr, l)
-- -----------------------------------------------------------------------------
......
......@@ -7,21 +7,31 @@ module System.OsPath.Internal where
import {-# SOURCE #-} System.OsPath
( isValid )
import System.OsPath.Types
import System.OsString.Internal hiding ( fromBytes )
import qualified System.OsString.Internal as OS
import Control.Monad.Catch
( MonadThrow )
import Data.ByteString
( ByteString )
import Language.Haskell.TH
import Language.Haskell.TH.Quote
( QuasiQuoter (..) )
import Language.Haskell.TH.Syntax
( Lift (..), lift )
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
import System.OsString.Internal.Types
import System.OsPath.Encoding
import Control.Monad (when)
import System.IO
( TextEncoding )
import System.OsPath.Encoding ( EncodingException(..) )
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
import qualified System.OsPath.Windows as PF
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
#else
import qualified System.OsPath.Posix as PF
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
#endif
......@@ -98,20 +108,37 @@ fromBytes :: MonadThrow m
fromBytes = OS.fromBytes
mkOsPath :: ByteString -> Q Exp
mkOsPath bs =
case fromBytes bs of
Just afp' ->
if isValid afp'
then lift afp'
else error "invalid filepath"
Nothing -> error "invalid encoding"
-- | QuasiQuote an 'OsPath'. This accepts Unicode characters
-- and encodes as UTF-8 on unix and UTF-16LE on windows. Runs 'isValid'
-- on the input.
osp :: QuasiQuoter
osp :: QuasiQuoter
osp = QuasiQuoter
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
{ quoteExp = \s -> do
osp' <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF16le ErrorOnCodingFailure) $ s
when (not $ isValid osp') $ fail ("filepath now valid: " ++ show osp')
lift osp'
, quotePat = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a pattern)"
, quoteType = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a type)"
, quoteDec = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a declaration)"
}
#else
{ quoteExp = \s -> do
osp' <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF8 ErrorOnCodingFailure) $ s
when (not $ isValid osp') $ fail ("filepath now valid: " ++ show osp')
lift osp'
, quotePat = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a pattern)"
, quoteType = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a type)"
, quoteDec = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a declaration)"
}
#endif
-- | Unpack an 'OsPath' to a list of 'OsChar'.
......
......@@ -8,3 +8,20 @@
#define WORD_NAME PosixChar
#include "Common.hs"
-- | QuasiQuote a 'PosixPath'. This accepts Unicode characters
-- and encodes as UTF-8. Runs 'isValid' on the input.
pstr :: QuasiQuoter
pstr =
QuasiQuoter
{ quoteExp = \s -> do
ps <- either (fail . show) pure $ encodeWith (mkUTF8 ErrorOnCodingFailure) s
when (not $ isValid ps) $ fail ("filepath now valid: " ++ show ps)
lift ps
, quotePat = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a pattern)"
, quoteType = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a type)"
, quoteDec = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a declaration)"
}
......@@ -8,3 +8,21 @@
#define WORD_NAME WindowsChar
#include "Common.hs"
-- | QuasiQuote a 'WindowsPath'. This accepts Unicode characters
-- and encodes as UTF-16LE. Runs 'isValid' on the input.
pstr :: QuasiQuoter
pstr =
QuasiQuoter
{ quoteExp = \s -> do
ps <- either (fail . show) pure $ encodeWith (mkUTF16le ErrorOnCodingFailure) s
when (not $ isValid ps) $ fail ("filepath now valid: " ++ show ps)
lift ps
, quotePat = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a pattern)"
, quoteType = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a type)"
, quoteDec = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a declaration)"
}
......@@ -66,7 +66,6 @@ import Data.Bifunctor ( first )
import GHC.IO
( evaluate, unsafePerformIO )
import qualified GHC.Foreign as GHC
import Language.Haskell.TH
import Language.Haskell.TH.Quote
( QuasiQuoter (..) )
import Language.Haskell.TH.Syntax
......@@ -79,7 +78,7 @@ import System.OsPath.Encoding
import System.IO
( TextEncoding, utf16le )
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
import qualified System.OsPath.Data.ByteString.Short.Word16 as BS
import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16
import qualified System.OsPath.Data.ByteString.Short as BS8
#else
import System.OsPath.Encoding
......@@ -142,7 +141,7 @@ encodeWith enc str = unsafePerformIO $ do
#endif
encodeFS :: String -> IO PLATFORM_STRING
#ifdef WINDOWS
encodeFS = pure . WindowsString . encodeWithBaseWindows
encodeFS = fmap WindowsString . encodeWithBaseWindows
#else
encodeFS = fmap PosixString . encodeWithBasePosix
#endif
......@@ -193,7 +192,7 @@ decodeWith unixEnc (PosixString ba) = unsafePerformIO $ do
#ifdef WINDOWS_DOC
-- | Like 'fromPlatformStringUtf', except this mimics the behavior of the base library when doing filesystem
-- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem
-- operations, which does permissive UTF-16 encoding, where coding errors generate
-- Chars in the surrogate range.
--
......@@ -210,7 +209,7 @@ decodeWith unixEnc (PosixString ba) = unsafePerformIO $ do
#endif
decodeFS :: PLATFORM_STRING -> IO String
#ifdef WINDOWS
decodeFS (WindowsString ba) = pure $ decodeWithBaseWindows ba
decodeFS (WindowsString ba) = decodeWithBaseWindows ba
#else
decodeFS (PosixString ba) = decodeWithBasePosix ba
#endif
......@@ -233,18 +232,27 @@ fromBytes :: MonadThrow m
-> m PLATFORM_STRING
#ifdef WINDOWS
fromBytes bs =
let ws = WindowsString . BS.toShort $ bs
let ws = WindowsString . BS16.toShort $ bs
in either throwM (const . pure $ ws) $ decodeWith ucs2le ws
#else
fromBytes = pure . PosixString . BS.toShort
#endif
qq :: (ByteString -> Q Exp) -> QuasiQuoter
qq quoteExp' =
#ifdef WINDOWS_DOC
-- | QuasiQuote a 'WindowsString'. This accepts Unicode characters
-- and encodes as UTF-16LE on windows.
#else
-- | QuasiQuote a 'PosixString'. This accepts Unicode characters
-- and encodes as UTF-8 on unix.
#endif
pstr :: QuasiQuoter
pstr =
QuasiQuoter
#ifdef WINDOWS
{ quoteExp = quoteExp' . BS.fromShort . either (error . show) id . encodeWithTE (mkUTF16le ErrorOnCodingFailure)
{ quoteExp = \s -> do
ps <- either (fail . show) pure $ encodeWith (mkUTF16le ErrorOnCodingFailure) s
lift ps
, quotePat = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a pattern)"
, quoteType = \_ ->
......@@ -253,7 +261,9 @@ qq quoteExp' =
fail "illegal QuasiQuote (allowed as expression only, used as a declaration)"
}
#else
{ quoteExp = quoteExp' . BS.fromShort . either (error . show) id . encodeWithTE (mkUTF8 ErrorOnCodingFailure)
{ quoteExp = \s -> do
ps <- either (fail . show) pure $ encodeWith (mkUTF8 ErrorOnCodingFailure) s
lift ps
, quotePat = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a pattern)"
, quoteType = \_ ->
......@@ -263,27 +273,11 @@ qq quoteExp' =
}
#endif
mkPlatformString :: ByteString -> Q Exp
mkPlatformString bs =
case fromBytes bs of
Just afp -> lift afp
Nothing -> error "invalid encoding"
#ifdef WINDOWS_DOC
-- | QuasiQuote a 'WindowsString'. This accepts Unicode characters
-- and encodes as UTF-16 on windows.
#else
-- | QuasiQuote a 'PosixString'. This accepts Unicode characters
-- and encodes as UTF-8 on unix.
#endif
pstr :: QuasiQuoter
pstr = qq mkPlatformString
-- | Unpack a platform string to a list of platform words.
unpack :: PLATFORM_STRING -> [PLATFORM_WORD]
#ifdef WINDOWS
unpack (WindowsString ba) = WindowsChar <$> BS.unpack ba
unpack (WindowsString ba) = WindowsChar <$> BS16.unpack ba
#else
unpack (PosixString ba) = PosixChar <$> BS.unpack ba
#endif
......@@ -296,7 +290,7 @@ unpack (PosixString ba) = PosixChar <$> BS.unpack ba
-- you want, because it will truncate unicode code points.
pack :: [PLATFORM_WORD] -> PLATFORM_STRING
#ifdef WINDOWS
pack = WindowsString . BS.pack . fmap (\(WindowsChar w) -> w)
pack = WindowsString . BS16.pack . fmap (\(WindowsChar w) -> w)
#else
pack = PosixString . BS.pack . fmap (\(PosixChar w) -> w)
#endif
......
......@@ -10,10 +10,7 @@ import Control.Monad.Catch
( MonadThrow )
import Data.ByteString
( ByteString )
import System.OsPath.Data.ByteString.Short
( fromShort )
import Data.Char
import Language.Haskell.TH
import Language.Haskell.TH.Quote
( QuasiQuoter (..) )
import Language.Haskell.TH.Syntax
......@@ -21,7 +18,7 @@ import Language.Haskell.TH.Syntax
import System.IO
( TextEncoding )
import System.OsPath.Encoding ( encodeWithTE, EncodingException(..) )
import System.OsPath.Encoding ( EncodingException(..) )
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
......@@ -119,11 +116,15 @@ fromBytes :: MonadThrow m
fromBytes = fmap OsString . PF.fromBytes
qq :: (ByteString -> Q Exp) -> QuasiQuoter
qq quoteExp' =
-- | QuasiQuote an 'OsString'. This accepts Unicode characters
-- and encodes as UTF-8 on unix and UTF-16 on windows.
osstr :: QuasiQuoter
osstr =
QuasiQuoter
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
{ quoteExp = quoteExp' . fromShort . either (error . show) id . encodeWithTE (mkUTF16le ErrorOnCodingFailure)
{ quoteExp = \s -> do
osp <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF16le ErrorOnCodingFailure) $ s
lift osp
, quotePat = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a pattern)"
, quoteType = \_ ->
......@@ -132,7 +133,9 @@ qq quoteExp' =
fail "illegal QuasiQuote (allowed as expression only, used as a declaration)"
}
#else
{ quoteExp = quoteExp' . fromShort . either (error . show) id . encodeWithTE (mkUTF8 ErrorOnCodingFailure)
{ quoteExp = \s -> do
osp <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF8 ErrorOnCodingFailure) $ s
lift osp
, quotePat = \_ ->
fail "illegal QuasiQuote (allowed as expression only, used as a pattern)"
, quoteType = \_ ->
......@@ -142,17 +145,6 @@ qq quoteExp' =
}
#endif
mkOsString :: ByteString -> Q Exp
mkOsString bs =
case fromBytes bs of
Just afp -> lift afp
Nothing -> error "invalid encoding"
-- | QuasiQuote an 'OsString'. This accepts Unicode characters
-- and encodes as UTF-8 on unix and UTF-16 on windows.
osstr :: QuasiQuoter
osstr = qq mkOsString
-- | Unpack an 'OsString' to a list of 'OsChar'.
unpack :: OsString -> [OsChar]
......
......@@ -55,7 +55,7 @@ import qualified Language.Haskell.TH.Syntax as TH
-- FFI call, this overhead is generally much preferable to
-- the memory fragmentation of pinned bytearrays
-- | Commonly used windows string as UTF16 bytes.
-- | Commonly used windows string as wide character bytes.
newtype WindowsString = WindowsString { getWindowsString :: BS.ShortByteString }
deriving (Eq, Ord, Semigroup, Monoid, Typeable, Generic, NFData)
......
cabal-version: 2.2
name: filepath
version: 1.4.99.0
version: 1.4.99.4
-- NOTE: Don't forget to update ./changelog.md
license: BSD-3-Clause
......@@ -48,8 +48,8 @@ description:
extra-source-files:
Generate.hs
Makefile
System/OsPath/Common.hs
System/FilePath/Internal.hs
System/OsPath/Common.hs
System/OsString/Common.hs
tests/bytestring-tests/Properties/Common.hs
......@@ -69,6 +69,9 @@ source-repository head
library
exposed-modules:
System.FilePath
System.FilePath.Posix
System.FilePath.Windows
System.OsPath
System.OsPath.Data.ByteString.Short
System.OsPath.Data.ByteString.Short.Internal
......@@ -81,9 +84,6 @@ library
System.OsPath.Types
System.OsPath.Windows
System.OsPath.Windows.Internal
System.FilePath
System.FilePath.Posix
System.FilePath.Windows
System.OsString
System.OsString.Internal
System.OsString.Internal.Types
......@@ -170,9 +170,9 @@ test-suite abstract-filepath
main-is: Test.hs
hs-source-dirs: tests tests/abstract-filepath
other-modules:
OsPathSpec
Arbitrary
EncodingSpec
OsPathSpec
TestUtil
build-depends:
......
......@@ -25,7 +25,7 @@ import qualified System.OsPath.Posix as AFP_P
import System.OsPath.Types
#endif
import System.OsString.Internal.Types
import System.OsPath.Encoding
import System.OsPath.Encoding.Internal
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
import GHC.IO.Encoding.Failure
......
......@@ -13,12 +13,11 @@ import Test.QuickCheck
import Data.Either ( isRight )
import qualified System.OsPath.Data.ByteString.Short as BS8
import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16
import qualified GHC.Foreign as GHC
import System.OsPath.Encoding.Internal
import GHC.IO (unsafePerformIO)
import GHC.IO.Encoding ( setFileSystemEncoding )
import System.IO
( TextEncoding, utf16le )
( utf16le )
import Control.Exception
import Control.DeepSeq
import Data.Bifunctor ( first )
......@@ -31,47 +30,47 @@ tests :: [(String, Property)]
tests =
[ ("ucs2le_decode . ucs2le_encode == id",
property $ \(padEven -> ba) ->
let decoded = decode ucs2le (BS8.toShort ba)
encoded = encode ucs2le =<< decoded
let decoded = decodeWithTE ucs2le (BS8.toShort ba)
encoded = encodeWithTE ucs2le =<< decoded
in (BS8.fromShort <$> encoded) === Right ba)
, ("utf16 doesn't handle invalid surrogate pairs",
property $
let str = [toEnum 55296, toEnum 55297]
encoded = encode utf16le str
decoded = decode utf16le =<< encoded
in decoded === Left "recoverEncode: invalid argument (invalid character)")
encoded = encodeWithTE utf16le str
decoded = decodeWithTE utf16le =<< encoded
in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing))
, ("ucs2 handles invalid surrogate pairs",
property $
let str = [toEnum 55296, toEnum 55297]
encoded = encode ucs2le str
decoded = decode ucs2le =<< encoded
encoded = encodeWithTE ucs2le str
decoded = decodeWithTE ucs2le =<< encoded
in decoded === Right str)
, ("can roundtrip arbitrary bytes through utf-8 (with RoundtripFailure)",
property $
\bs ->
let decoded = decode (mkUTF8 RoundtripFailure) (BS8.toShort bs)
encoded = encode (mkUTF8 RoundtripFailure) =<< decoded
let decoded = decodeWithTE (mkUTF8 RoundtripFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF8 RoundtripFailure) =<< decoded
in (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs)))
, ("can decode arbitrary strings through utf-8 (with RoundtripFailure)",
property $
\(NonNullSurrogateString str) ->
let encoded = encode (mkUTF8 RoundtripFailure) str
decoded = decode (mkUTF8 RoundtripFailure) =<< encoded
let encoded = encodeWithTE (mkUTF8 RoundtripFailure) str
decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded
in expectFailure $ (either (const 0) length decoded, decoded) === (length str, Right str))
, ("utf-8 roundtrip encode cannot deal with some surrogates",
property $
let str = [toEnum 0xDFF0, toEnum 0xDFF2]
encoded = encode (mkUTF8 RoundtripFailure) str
decoded = decode (mkUTF8 RoundtripFailure) =<< encoded
in decoded === Left "recoverEncode: invalid argument (invalid character)")
encoded = encodeWithTE (mkUTF8 RoundtripFailure) str
decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded
in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing))
, ("cannot roundtrip arbitrary bytes through utf-16 (with RoundtripFailure)",
property $
\(padEven -> bs) ->
let decoded = decode (mkUTF16le RoundtripFailure) (BS8.toShort bs)
encoded = encode (mkUTF16le RoundtripFailure) =<< decoded
let decoded = decodeWithTE (mkUTF16le RoundtripFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF16le RoundtripFailure) =<< decoded
in expectFailure $ (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs)))
, ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf16le)",
property $
......@@ -114,17 +113,15 @@ tests =
, ("decodeWithBaseWindows == utf16le_b",
property $
\(BS8.toShort . padEven -> bs) ->
let bs' = BS16.takeWhile (/= wNUL) bs
decoded = decodeW' bs'
decoded' = first displayException $ decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) bs'
let decoded = decodeW' bs
decoded' = first displayException $ decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) bs
in decoded === decoded')
, ("encodeWithBaseWindows == utf16le_b",
property $
\(NonNullSurrogateString str) ->
let str' = takeWhile (/= '\NUL') str
decoded = encodeW' str'
decoded' = first displayException $ encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) str'
let decoded = encodeW' str
decoded' = first displayException $ encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) str
in decoded === decoded')
, ("encodeWithTE/decodeWithTE never fails (utf16le_b)",
......@@ -142,34 +139,23 @@ padEven bs
| otherwise = bs `BS.append` BS.pack [70]
decode :: TextEncoding -> BS8.ShortByteString -> Either String String
decode enc ba = unsafePerformIO $ do
r <- try @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen enc fp
evaluate $ force $ first displayException r
encode :: TextEncoding -> String -> Either String BS8.ShortByteString
encode enc str = unsafePerformIO $ do
r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> BS8.packCStringLen cstr
evaluate $ force $ first displayException r
decodeP' :: BS8.ShortByteString -> Either String String
decodeP' ba = unsafePerformIO $ do
r <- try @SomeException $ BS8.useAsCStringLen ba $ \fp -> peekFilePathLenPosix fp
r <- try @SomeException $ decodeWithBasePosix ba
evaluate $ force $ first displayException r
encodeP' :: String -> Either String BS8.ShortByteString
encodeP' str = unsafePerformIO $ do
r <- try @SomeException $ withFilePathPosix str $ \cstr -> BS8.packCString cstr
r <- try @SomeException $ encodeWithBasePosix str
evaluate $ force $ first displayException r
decodeW' :: BS16.ShortByteString -> Either String String
decodeW' ba = unsafePerformIO $ do
r <- try @SomeException $ BS16.useAsCWString ba $ \fp -> peekFilePathWin fp
r <- try @SomeException $ decodeWithBaseWindows ba
evaluate $ force $ first displayException r
encodeW' :: String -> Either String BS8.ShortByteString
encodeW' str = unsafePerformIO $ do
r <- try @SomeException $ withFilePathWin str $ \cstr -> BS16.packCWString cstr
r <- try @SomeException $ encodeWithBaseWindows str
evaluate $ force $ first displayException r
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE QuasiQuotes #-}
module OsPathSpec where
......@@ -13,6 +14,8 @@ import System.OsPath.Windows as Windows
import System.OsPath.Encoding
import qualified System.OsString.Internal.Types as OS
import System.OsPath.Data.ByteString.Short ( toShort )
import System.OsString.Posix as PosixS
import System.OsString.Windows as WindowsS
import Control.Exception
import Data.ByteString ( ByteString )
......@@ -191,6 +194,43 @@ tests =
.&&. r5 === expected
)
)
, ("QuasiQuoter (WindowsString)",
property $ do
let bs = WindowsString $ BS16.pack [0x0041,0x0042,0x0063,0x004b,0x005f,0x0028,0x30c4,0x0031,0x0032,0x0033,0x005f,0x0026,0x002a,0x002a]
let expected = [WindowsS.pstr|ABcK_(123_&**|]
bs === expected
)
, ("QuasiQuoter (PosixString)",
property $ do
let bs = PosixString $ SBS.pack [0x41,0x42,0x63,0x4b,0x5f,0x28,0xe3,0x83,0x84,0x31,0x32,0x33,0x5f,0x26,0x2a,0x2a]
let expected = [PosixS.pstr|ABcK_(123_&**|]
bs === expected
)
, ("QuasiQuoter (WindowsPath)",
property $ do
let bs = WindowsString $ BS16.pack [0x0041,0x0042,0x0063,0x004b,0x005f]
let expected = [Windows.pstr|ABcK_|]
bs === expected
)
, ("QuasiQuoter (PosixPath)",
property $ do
let bs = PosixString $ SBS.pack [0x41,0x42,0x63,0x4b,0x5f]
let expected = [Posix.pstr|ABcK_|]
bs === expected
)
, ("pack . unpack == id (Windows)",
property $ \ws@(WindowsString _) ->
Windows.pack (Windows.unpack ws) === ws
)
, ("pack . unpack == id (Posix)",
property $ \ws@(PosixString _) ->
Posix.pack (Posix.unpack ws) === ws
)
, ("pack . unpack == id (OsPath)",
property $ \ws@(OsString _) ->
OSP.pack (OSP.unpack ws) === ws
)
] ++ testBatch (QC.ord (\(a :: OsPath) -> pure a))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment