Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Glasgow Haskell Compiler
Packages
text
Commits
c67f3dcc
Commit
c67f3dcc
authored
Jul 18, 2021
by
Bodigrim
Browse files
Experiment with case conversions
parent
1cbc95b0
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
scripts/CaseFolding.hs
View file @
c67f3dcc
...
...
@@ -11,6 +11,7 @@ module CaseFolding
)
where
import
Arsec
import
Data.Bits
data
Fold
=
Fold
{
code
::
Char
...
...
@@ -34,13 +35,19 @@ parseCF :: FilePath -> IO (Either ParseError CaseFolding)
parseCF
name
=
parse
entries
name
<$>
readFile
name
mapCF
::
CaseFolding
->
[
String
]
mapCF
(
CF
_
ms
)
=
typ
++
(
map
nice
.
filter
p
$
ms
)
++
[
last
]
mapCF
(
CF
_
ms
)
=
typ
++
map
printUnusual
ms'
++
map
printUsual
usual
++
[
last
]
where
typ
=
[
"foldMapping :: forall s. Char -> s -> Step (CC s) Char"
,
"{-# NOINLINE foldMapping #-}"
]
last
=
"foldMapping c s = Yield (toLower c) (CC s '
\\
0' '
\\
0')"
nice
c
=
"-- "
++
name
c
++
"
\n
"
++
"foldMapping "
++
showC
(
code
c
)
++
" s = Yield "
++
x
++
" (CC s "
++
y
++
" "
++
z
++
")"
where
[
x
,
y
,
z
]
=
(
map
showC
.
take
3
)
(
mapping
c
++
repeat
'
\0
'
)
ms'
=
filter
p
ms
p
f
=
status
f
`
elem
`
"CF"
&&
mapping
f
/=
[
toLower
(
code
f
)]
unusual
=
map
code
ms'
usual
=
filter
(
\
c
->
toLower
c
/=
c
&&
c
`
notElem
`
unusual
)
[
minBound
..
maxBound
]
typ
=
[
"foldMapping :: Char# -> _ {- unboxed Int64 -}"
,
"{-# NOINLINE foldMapping #-}"
,
"foldMapping =
\\
case"
]
last
=
" _ -> unI64 0"
printUnusual
c
=
" -- "
++
name
c
++
"
\n
"
++
" "
++
showC
(
code
c
)
++
"# -> unI64 "
++
show
(
ord
x
+
(
ord
y
`
shiftL
`
21
)
+
(
ord
z
`
shiftL
`
42
))
where
x
:
y
:
z
:
_
=
mapping
c
++
repeat
'
\0
'
printUsual
c
=
" "
++
showC
c
++
"# -> unI64 "
++
show
(
ord
(
toLower
c
))
scripts/CaseMapping.hs
View file @
c67f3dcc
...
...
@@ -22,14 +22,17 @@ main = do
let
comments
=
map
(
"--"
++
)
$
take
2
(
cfComments
cfs
)
++
take
2
(
scComments
scs
)
mapM_
(
hPutStrLn
h
)
$
[
"{-# LANGUAGE Rank2Types #-}"
,
"-- AUTOMATICALLY GENERATED - DO NOT EDIT"
[
"-- AUTOMATICALLY GENERATED - DO NOT EDIT"
,
"-- Generated by scripts/CaseMapping.hs"
]
++
comments
++
[
""
,
"{-# LANGUAGE LambdaCase, MagicHash, PartialTypeSignatures #-}"
,
"{-# OPTIONS_GHC -Wno-partial-type-signatures #-}"
,
"module Data.Text.Internal.Fusion.CaseMapping where"
,
"import Data.Char"
,
"import Data.Text.Internal.Fusion.Types"
,
"import GHC.Int"
,
"import GHC.Exts"
,
"unI64 :: Int64 -> _ {- unboxed Int64 -}"
,
"unI64 (I64# n) = n"
,
""
]
mapM_
(
hPutStrLn
h
)
(
mapSC
"upper"
upper
toUpper
scs
)
mapM_
(
hPutStrLn
h
)
(
mapSC
"lower"
lower
toLower
scs
)
...
...
scripts/SpecialCasing.hs
View file @
c67f3dcc
...
...
@@ -11,6 +11,7 @@ module SpecialCasing
)
where
import
Arsec
import
Data.Bits
data
SpecialCasing
=
SC
{
scComments
::
[
Comment
],
scCasing
::
[
Case
]
}
deriving
(
Show
)
...
...
@@ -40,17 +41,23 @@ parseSC name = parse entries name <$> readFile name
mapSC
::
String
->
(
Case
->
String
)
->
(
Char
->
Char
)
->
SpecialCasing
->
[
String
]
mapSC
which
access
twiddle
(
SC
_
ms
)
=
typ
++
(
map
nice
.
filter
p
$
ms
)
++
[
last
]
typ
++
map
printUnusual
ms'
++
map
printUsual
usual
++
[
last
]
where
typ
=
[
which
++
"Mapping :: forall s. Char -> s -> Step (CC s) Char"
,
"{-# NOINLINE "
++
which
++
"Mapping #-}"
]
last
=
which
++
"Mapping c s = Yield (to"
++
ucFirst
which
++
" c) (CC s '
\\
0' '
\\
0')"
nice
c
=
"-- "
++
name
c
++
"
\n
"
++
which
++
"Mapping "
++
showC
(
code
c
)
++
" s = Yield "
++
x
++
" (CC s "
++
y
++
" "
++
z
++
")"
where
[
x
,
y
,
z
]
=
(
map
showC
.
take
3
)
(
access
c
++
repeat
'
\0
'
)
ms'
=
filter
p
ms
p
c
=
[
k
]
/=
a
&&
a
/=
[
twiddle
k
]
&&
null
(
conditions
c
)
where
a
=
access
c
k
=
code
c
unusual
=
map
code
ms'
usual
=
filter
(
\
c
->
twiddle
c
/=
c
&&
c
`
notElem
`
unusual
)
[
minBound
..
maxBound
]
typ
=
[
which
++
"Mapping :: Char# -> _ {- unboxed Int64 -}"
,
"{-# NOINLINE "
++
which
++
"Mapping #-}"
,
which
++
"Mapping =
\\
case"
]
last
=
" _ -> unI64 0"
printUnusual
c
=
" -- "
++
name
c
++
"
\n
"
++
" "
++
showC
(
code
c
)
++
"# -> unI64 "
++
show
(
ord
x
+
(
ord
y
`
shiftL
`
21
)
+
(
ord
z
`
shiftL
`
42
))
where
x
:
y
:
z
:
_
=
access
c
++
repeat
'
\0
'
printUsual
c
=
" "
++
showC
c
++
"# -> unI64 "
++
show
(
ord
(
twiddle
c
))
ucFirst
(
c
:
cs
)
=
toUpper
c
:
cs
ucFirst
[]
=
[]
src/Data/Text/Internal/Fusion/CaseMapping.hs
View file @
c67f3dcc
This diff is collapsed.
Click to expand it.
src/Data/Text/Internal/Fusion/Common.hs
View file @
c67f3dcc
{-# LANGUAGE BangPatterns, MagicHash, Rank2Types #-}
{-# LANGUAGE BangPatterns, MagicHash, Rank2Types, PartialTypeSignatures #-}
{-# OPTIONS_GHC -Wno-partial-type-signatures #-}
-- |
-- Module : Data.Text.Internal.Fusion.Common
-- Copyright : (c) Bryan O'Sullivan 2009, 2012
...
...
@@ -124,13 +125,15 @@ import Prelude (Bool(..), Char, Eq(..), Int, Integral, Maybe(..),
(
&&
),
fromIntegral
,
otherwise
)
import
qualified
Data.List
as
L
import
qualified
Prelude
as
P
import
Data.Bits
(
shiftL
,
shiftR
,
(
.&.
))
import
Data.Char
(
isLetter
,
isSpace
)
import
Data
.Int
(
Int64
)
import
GHC
.Int
(
Int64
(
..
)
)
import
Data.Text.Internal.Encoding.Utf8
(
chr2
,
chr3
,
chr4
,
utf8LengthByLeader
)
import
Data.Text.Internal.Fusion.Types
import
Data.Text.Internal.Fusion.CaseMapping
(
foldMapping
,
lowerMapping
,
titleMapping
,
upperMapping
)
import
Data.Text.Internal.Fusion.Size
import
GHC.Exts
(
Char
(
..
),
Char
#
,
chr
#
)
import
GHC.Prim
(
Addr
#
,
indexWord8OffAddr
#
)
import
GHC.Types
(
Int
(
..
))
import
Data.Text.Internal.Unsafe.Char
(
unsafeChr8
)
...
...
@@ -478,17 +481,27 @@ intersperse c (Stream next0 s0 len) = Stream next (I1 s0) (len + unknownSize)
-- characters.
-- | Map a 'Stream' through the given case-mapping function.
caseConvert
::
(
forall
s
.
Char
->
s
->
Step
(
CC
s
)
Char
)
caseConvert
::
(
Char
#
->
_
{- unboxed Int64 -}
)
->
Stream
Char
->
Stream
Char
caseConvert
remap
(
Stream
next0
s0
len
)
=
Stream
next
(
CC
s0
'
\0
'
'
\0
'
)
(
len
`
unionSize
`
(
3
*
len
))
Stream
next
(
CC
s0
0
)
(
len
`
unionSize
`
(
3
*
len
))
where
next
(
CC
s
'
\0
'
_
)
=
next
(
CC
s
0
)
=
case
next0
s
of
Done
->
Done
Skip
s'
->
Skip
(
CC
s'
'
\0
'
'
\0
'
)
Yield
c
s'
->
remap
c
s'
next
(
CC
s
a
b
)
=
Yield
a
(
CC
s
b
'
\0
'
)
Skip
s'
->
Skip
(
CC
s'
0
)
Yield
c
@
(
C
#
c
#
)
s'
->
case
I64
#
(
remap
c
#
)
of
0
->
Yield
c
(
CC
s'
0
)
ab
->
let
(
a
,
b
)
=
chopOffChar
ab
in
Yield
a
(
CC
s'
b
)
next
(
CC
s
ab
)
=
let
(
a
,
b
)
=
chopOffChar
ab
in
Yield
a
(
CC
s
b
)
chopOffChar
::
Int64
->
(
Char
,
Int64
)
chopOffChar
ab
=
(
chr
a
,
ab
`
shiftR
`
21
)
where
chr
(
I
#
n
)
=
C
#
(
chr
#
n
)
mask
=
(
1
`
shiftL
`
21
)
-
1
a
=
fromIntegral
$
ab
.&.
mask
-- | /O(n)/ Convert a string to folded case. This function is mainly
-- useful for performing caseless (or case insensitive) string
...
...
@@ -556,20 +569,25 @@ toLower = caseConvert lowerMapping
--
-- @ 'Data.Text.Internal.unstream' . 'toTitle' . 'Data.Text.Internal.Fusion.stream' = 'Data.Text.toTitle' @
toTitle
::
Stream
Char
->
Stream
Char
toTitle
(
Stream
next0
s0
len
)
=
Stream
next
(
CC
(
False
:*:
s0
)
'
\0
'
'
\0
'
)
(
len
+
unknownSize
)
toTitle
(
Stream
next0
s0
len
)
=
Stream
next
(
CC
(
False
:*:
s0
)
0
)
(
len
+
unknownSize
)
where
next
(
CC
(
letter
:*:
s
)
'
\0
'
_
)
=
next
(
CC
(
letter
:*:
s
)
0
)
=
case
next0
s
of
Done
->
Done
Skip
s'
->
Skip
(
CC
(
letter
:*:
s'
)
'
\0
'
'
\0
'
)
Yield
c
s'
|
nonSpace
->
if
letter
then
lowerMapping
c
(
nonSpace
:*:
s'
)
else
titleMapping
c
(
letter'
:*:
s'
)
|
otherwise
->
Yield
c
(
CC
(
letter'
:*:
s'
)
'
\0
'
'
\0
'
)
Skip
s'
->
Skip
(
CC
(
letter
:*:
s'
)
0
)
Yield
c
@
(
C
#
c
#
)
s'
|
nonSpace
,
letter
->
case
I64
#
(
lowerMapping
c
#
)
of
0
->
Yield
c
(
CC
(
nonSpace
:*:
s'
)
0
)
ab
->
let
(
a
,
b
)
=
chopOffChar
ab
in
Yield
a
(
CC
(
nonSpace
:*:
s'
)
b
)
|
nonSpace
->
case
I64
#
(
titleMapping
c
#
)
of
0
->
Yield
c
(
CC
(
letter'
:*:
s'
)
0
)
ab
->
let
(
a
,
b
)
=
chopOffChar
ab
in
Yield
a
(
CC
(
letter'
:*:
s'
)
b
)
|
otherwise
->
Yield
c
(
CC
(
letter'
:*:
s'
)
0
)
where
nonSpace
=
P
.
not
(
isSpace
c
)
letter'
=
isLetter
c
next
(
CC
s
a
b
)
=
Yield
a
(
CC
s
b
'
\0
'
)
next
(
CC
s
ab
)
=
let
(
a
,
b
)
=
chopOffChar
ab
in
Yield
a
(
CC
s
b
)
{-# INLINE [0] toTitle #-}
data
Justify
i
s
=
Just1
!
i
!
s
...
...
src/Data/Text/Internal/Fusion/Types.hs
View file @
c67f3dcc
...
...
@@ -29,10 +29,11 @@ module Data.Text.Internal.Fusion.Types
)
where
import
Data.Text.Internal.Fusion.Size
import
Data.Int
(
Int64
)
import
Data.Word
(
Word8
)
-- | Specialised tuple for case conversion.
data
CC
s
=
CC
!
s
{-# UNPACK #-}
!
Char
{-# UNPACK #-}
!
Char
data
CC
s
=
CC
!
s
{-# UNPACK #-}
!
Int64
-- | Restreaming state.
data
RS
s
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment