Commit 9d7da331 authored by simonmar's avatar simonmar

[project @ 2006-01-06 16:30:17 by simonmar]

Add support for UTF-8 source files

GHC finally has support for full Unicode in source files.  Source
files are now assumed to be UTF-8 encoded, and the full range of
Unicode characters can be used, with classifications recognised using
the implementation from Data.Char.  This incedentally means that only
the stage2 compiler will recognise Unicode in source files, because I
was too lazy to port the unicode classifier code into libcompat.

Additionally, the following synonyms for keywords are now recognised:

  forall symbol 	(U+2200)	forall
  right arrow   	(U+2192)	->
  left arrow   		(U+2190)	<-
  horizontal ellipsis 	(U+22EF)	..

there are probably more things we could add here.

This will break some source files if Latin-1 characters are being used.
In most cases this should result in a UTF-8 decoding error.  Later on
if we want to support more encodings (perhaps with a pragma to specify
the encoding), I plan to do it by recoding into UTF-8 before parsing.

Internally, there were some pretty big changes:

  - FastStrings are now stored in UTF-8

  - Z-encoding has been moved right to the back end.  Previously we
    used to Z-encode every identifier on the way in for simplicity,
    and only decode when we needed to show something to the user.
    Instead, we now keep every string in its UTF-8 encoding, and
    Z-encode right before printing it out.  To avoid Z-encoding the
    same string multiple times, the Z-encoding is cached inside the
    FastString the first time it is requested.

    This speeds up the compiler - I've measured some definite
    improvement in parsing at least, and I expect compilations overall
    to be faster too.  It also cleans up a lot of cruft from the
    OccName interface.  Z-encoding is nicely hidden inside the
    Outputable instance for Names & OccNames now.

  - StringBuffers are UTF-8 too, and are now represented as
    ForeignPtrs.

  - I've put together some test cases, not by any means exhaustive,
    but there are some interesting UTF-8 decoding error cases that
    aren't obvious.  Also, take a look at unicode001.hs for a demo.
parent 2a2efb72
......@@ -90,11 +90,19 @@ name = Util.global (value) :: IORef (ty); \
-- when compiling FastString itself
#ifndef COMPILING_FAST_STRING
--
import qualified FastString
import qualified FastString as FS
#endif
#define SLIT(x) (FastString.mkLitString# (x#))
#define FSLIT(x) (FastString.mkFastString# (x#))
#define SLIT(x) (FS.mkLitString# (x#))
#define FSLIT(x) (FS.mkFastString# (x#))
-- Useful for declaring arguments to be strict
#define STRICT1(f) f a b c | a `seq` False = undefined
#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
#define STRICT6(f) f a b c d e f | a `seq` b `seq` c `seq` d `seq` e `seq` f `seq` False = undefined
#endif /* HsVersions.h */
......@@ -408,7 +408,7 @@ SRC_HC_OPTS += -DGHCI -package template-haskell
PKG_DEPENDS += template-haskell
# Use threaded RTS with GHCi, so threads don't get blocked at the prompt.
SRC_HC_OPTS += -threaded
# SRC_HC_OPTS += -threaded
ALL_DIRS += ghci
......
......@@ -9,7 +9,7 @@ module Id (
-- Simple construction
mkGlobalId, mkLocalId, mkLocalIdWithInfo,
mkSysLocal, mkSysLocalUnencoded, mkUserLocal, mkVanillaGlobal,
mkSysLocal, mkUserLocal, mkVanillaGlobal,
mkTemplateLocals, mkTemplateLocalsNum, mkWildId, mkTemplateLocal,
mkWorkerId, mkExportedLocalId,
......@@ -105,15 +105,15 @@ import qualified Demand ( Demand )
import DataCon ( DataCon, isUnboxedTupleCon )
import NewDemand ( Demand, StrictSig, topDmd, topSig, isBottomingSig )
import Name ( Name, OccName, nameIsLocalOrFrom,
mkSystemVarName, mkSystemVarNameEncoded, mkInternalName,
getOccName, getSrcLoc
)
mkSystemVarName, mkInternalName, getOccName,
getSrcLoc )
import Module ( Module )
import OccName ( EncodedFS, mkWorkerOcc )
import OccName ( mkWorkerOcc )
import Maybes ( orElse )
import SrcLoc ( SrcLoc )
import Outputable
import Unique ( Unique, mkBuiltinUnique )
import FastString ( FastString )
import StaticFlags ( opt_NoStateHack )
-- infixl so you can say (id `set` a `set` b)
......@@ -162,15 +162,10 @@ mkLocalId name ty = mkLocalIdWithInfo name ty vanillaIdInfo
-- SysLocal: for an Id being created by the compiler out of thin air...
-- UserLocal: an Id with a name the user might recognize...
mkUserLocal :: OccName -> Unique -> Type -> SrcLoc -> Id
mkSysLocal :: EncodedFS -> Unique -> Type -> Id
mkSysLocal :: FastString -> Unique -> Type -> Id
mkVanillaGlobal :: Name -> Type -> IdInfo -> Id
-- for SysLocal, we assume the base name is already encoded, to avoid
-- re-encoding the same string over and over again.
mkSysLocal fs uniq ty = mkLocalId (mkSystemVarNameEncoded uniq fs) ty
-- version to use when the faststring needs to be encoded
mkSysLocalUnencoded fs uniq ty = mkLocalId (mkSystemVarName uniq fs) ty
mkSysLocal fs uniq ty = mkLocalId (mkSystemVarName uniq fs) ty
mkUserLocal occ uniq ty loc = mkLocalId (mkInternalName uniq occ loc) ty
mkVanillaGlobal = mkGlobalId VanillaGlobal
......
......@@ -35,7 +35,6 @@ import FastTypes
import FastString
import Binary
import UnicodeUtil ( stringToUtf8 )
import Ratio ( numerator )
import FastString ( uniqueOfFS, lengthFS )
import DATA_INT ( Int8, Int16, Int32 )
......@@ -95,7 +94,11 @@ data Literal
= ------------------
-- First the primitive guys
MachChar Char -- Char# At least 31 bits
| MachStr FastString
| MachStr FastString -- A string-literal: stored and emitted
-- UTF-8 encoded, we'll arrange to decode it
-- at runtime. Also emitted with a '\0'
-- terminator.
| MachNullAddr -- the NULL pointer, the only pointer value
-- that can be represented as a Literal.
......@@ -206,7 +209,7 @@ mkMachInt64 x = MachInt64 x
mkMachWord64 x = MachWord64 x
mkStringLit :: String -> Literal
mkStringLit s = MachStr (mkFastString (stringToUtf8 s))
mkStringLit s = MachStr (mkFastString s) -- stored UTF-8 encoded
inIntRange, inWordRange :: Integer -> Bool
inIntRange x = x >= tARGET_MIN_INT && x <= tARGET_MAX_INT
......
......@@ -59,7 +59,7 @@ import Class ( Class, classTyCon, classSelIds )
import Var ( Id, TyVar, Var )
import VarSet ( isEmptyVarSet, subVarSet, varSetElems )
import Name ( mkFCallName, mkWiredInName, Name, BuiltInSyntax(..) )
import OccName ( mkOccFS, varName )
import OccName ( mkOccNameFS, varName )
import PrimOp ( PrimOp, primOpSig, primOpOcc, primOpTag )
import ForeignCall ( ForeignCall )
import DataCon ( DataCon, DataConIds(..), dataConTyVars,
......@@ -847,7 +847,7 @@ another gun with which to shoot yourself in the foot.
\begin{code}
mkWiredInIdName mod fs uniq id
= mkWiredInName mod (mkOccFS varName fs) uniq Nothing (AnId id) UserSyntax
= mkWiredInName mod (mkOccNameFS varName fs) uniq Nothing (AnId id) UserSyntax
unsafeCoerceName = mkWiredInIdName gHC_PRIM FSLIT("unsafeCoerce#") unsafeCoerceIdKey unsafeCoerceId
nullAddrName = mkWiredInIdName gHC_PRIM FSLIT("nullAddr#") nullAddrIdKey nullAddrId
......
......@@ -4,7 +4,7 @@
Module
~~~~~~~~~~
Simply the name of a module, represented as a Z-encoded FastString.
Simply the name of a module, represented as a FastString.
These are Uniquable, hence we can build FiniteMaps with ModuleNames as
the keys.
......@@ -17,13 +17,11 @@ module Module
, ModLocation(..)
, addBootSuffix, addBootSuffix_maybe, addBootSuffixLocn
, moduleString -- :: ModuleName -> EncodedString
, moduleUserString -- :: ModuleName -> UserString
, moduleFS -- :: ModuleName -> EncodedFS
, moduleString -- :: ModuleName -> String
, moduleFS -- :: ModuleName -> FastString
, mkModule -- :: UserString -> ModuleName
, mkModuleFS -- :: UserFS -> ModuleName
, mkSysModuleFS -- :: EncodedFS -> ModuleName
, mkModule -- :: String -> ModuleName
, mkModuleFS -- :: FastString -> ModuleName
, ModuleEnv
, elemModuleEnv, extendModuleEnv, extendModuleEnvList, plusModuleEnv_C
......@@ -108,7 +106,7 @@ addBootSuffixLocn locn
%************************************************************************
\begin{code}
newtype Module = Module EncodedFS
newtype Module = Module FastString
-- Haskell module names can include the quote character ',
-- so the module names have the z-encoding applied to them
......@@ -131,30 +129,26 @@ instance Ord Module where
instance Outputable Module where
ppr = pprModule
pprModule :: Module -> SDoc
pprModule (Module nm) = pprEncodedFS nm
pprModule (Module nm) =
getPprStyle $ \ sty ->
if codeStyle sty
then ftext (zEncodeFS nm)
else ftext nm
moduleFS :: Module -> EncodedFS
moduleFS :: Module -> FastString
moduleFS (Module mod) = mod
moduleString :: Module -> EncodedString
moduleString :: Module -> String
moduleString (Module mod) = unpackFS mod
moduleUserString :: Module -> UserString
moduleUserString (Module mod) = decode (unpackFS mod)
-- used to be called mkSrcModule
mkModule :: UserString -> Module
mkModule s = Module (mkFastString (encode s))
mkModule :: String -> Module
mkModule s = Module (mkFastString s)
-- used to be called mkSrcModuleFS
mkModuleFS :: UserFS -> Module
mkModuleFS s = Module (encodeFS s)
-- used to be called mkSysModuleFS
mkSysModuleFS :: EncodedFS -> Module
mkSysModuleFS s = Module s
mkModuleFS :: FastString -> Module
mkModuleFS s = Module s
\end{code}
%************************************************************************
......
......@@ -12,7 +12,7 @@ module Name (
Name, -- Abstract
BuiltInSyntax(..),
mkInternalName, mkSystemName,
mkSystemVarName, mkSystemVarNameEncoded, mkSysTvName,
mkSystemVarName, mkSysTvName,
mkFCallName, mkIPName,
mkExternalName, mkWiredInName,
......@@ -38,10 +38,11 @@ module Name (
import {-# SOURCE #-} TypeRep( TyThing )
import OccName -- All of it
import Module ( Module )
import Module ( Module, moduleFS )
import SrcLoc ( noSrcLoc, wiredInSrcLoc, SrcLoc )
import Unique ( Unique, Uniquable(..), getKey, pprUnique )
import Maybes ( orElse, isJust )
import FastString ( FastString, zEncodeFS )
import Outputable
\end{code}
......@@ -215,21 +216,16 @@ mkSystemName :: Unique -> OccName -> Name
mkSystemName uniq occ = Name { n_uniq = uniq, n_sort = System,
n_occ = occ, n_loc = noSrcLoc }
mkSystemVarName :: Unique -> UserFS -> Name
mkSystemVarName uniq fs = mkSystemName uniq (mkVarOcc fs)
mkSystemVarName :: Unique -> FastString -> Name
mkSystemVarName uniq fs = mkSystemName uniq (mkVarOccFS fs)
-- Use this version when the string is already encoded. Avoids duplicating
-- the string each time a new name is created.
mkSystemVarNameEncoded :: Unique -> EncodedFS -> Name
mkSystemVarNameEncoded uniq fs = mkSystemName uniq (mkSysOccFS varName fs)
mkSysTvName :: Unique -> FastString -> Name
mkSysTvName uniq fs = mkSystemName uniq (mkOccNameFS tvName fs)
mkSysTvName :: Unique -> EncodedFS -> Name
mkSysTvName uniq fs = mkSystemName uniq (mkSysOccFS tvName fs)
mkFCallName :: Unique -> EncodedString -> Name
mkFCallName :: Unique -> String -> Name
-- The encoded string completely describes the ccall
mkFCallName uniq str = Name { n_uniq = uniq, n_sort = Internal,
n_occ = mkFCallOcc str, n_loc = noSrcLoc }
n_occ = mkVarOcc str, n_loc = noSrcLoc }
mkIPName :: Unique -> OccName -> Name
mkIPName uniq occ
......@@ -317,13 +313,13 @@ pprName (Name {n_sort = sort, n_uniq = uniq, n_occ = occ})
Internal -> pprInternal sty uniq occ
pprExternal sty uniq mod occ is_wired is_builtin
| codeStyle sty = ppr mod <> char '_' <> ppr_occ_name occ
| codeStyle sty = ppr_z_module mod <> char '_' <> ppr_z_occ_name occ
-- In code style, always qualify
-- ToDo: maybe we could print all wired-in things unqualified
-- in code style, to reduce symbol table bloat?
| debugStyle sty = ppr mod <> dot <> ppr_occ_name occ
<> braces (hsep [if is_wired then ptext SLIT("(w)") else empty,
text (briefOccNameFlavour occ),
pprNameSpaceBrief (occNameSpace occ),
pprUnique uniq])
| BuiltInSyntax <- is_builtin = ppr_occ_name occ
-- never qualify builtin syntax
......@@ -332,7 +328,7 @@ pprExternal sty uniq mod occ is_wired is_builtin
pprInternal sty uniq occ
| codeStyle sty = pprUnique uniq
| debugStyle sty = ppr_occ_name occ <> braces (hsep [text (briefOccNameFlavour occ),
| debugStyle sty = ppr_occ_name occ <> braces (hsep [pprNameSpaceBrief (occNameSpace occ),
pprUnique uniq])
| dumpStyle sty = ppr_occ_name occ <> char '_' <> pprUnique uniq
-- For debug dumps, we're not necessarily dumping
......@@ -343,15 +339,21 @@ pprInternal sty uniq occ
pprSystem sty uniq occ
| codeStyle sty = pprUnique uniq
| debugStyle sty = ppr_occ_name occ <> char '_' <> pprUnique uniq
<> braces (text (briefOccNameFlavour occ))
<> braces (pprNameSpaceBrief (occNameSpace occ))
| otherwise = ppr_occ_name occ <> char '_' <> pprUnique uniq
-- If the tidy phase hasn't run, the OccName
-- is unlikely to be informative (like 's'),
-- so print the unique
ppr_occ_name occ = pprEncodedFS (occNameFS occ)
ppr_occ_name occ = ftext (occNameFS occ)
-- Don't use pprOccName; instead, just print the string of the OccName;
-- we print the namespace in the debug stuff above
-- In code style, we Z-encode the strings. The results of Z-encoding each FastString are
-- cached behind the scenes in the FastString implementation.
ppr_z_occ_name occ = ftext (zEncodeFS (occNameFS occ))
ppr_z_module mod = ftext (zEncodeFS (moduleFS mod))
\end{code}
%************************************************************************
......
This diff is collapsed.
......@@ -40,25 +40,17 @@ module RdrName (
#include "HsVersions.h"
import OccName ( NameSpace, varName,
OccName, UserFS,
setOccNameSpace,
mkOccFS, occNameFlavour,
isDataOcc, isTvOcc, isTcOcc,
OccEnv, emptyOccEnv, extendOccEnvList, lookupOccEnv,
elemOccEnv, plusOccEnv_C, extendOccEnv_C, foldOccEnv,
occEnvElts
)
import OccName
import Module ( Module, mkModuleFS )
import Name ( Name, NamedThing(getName), nameModule, nameParent_maybe,
nameOccName, isExternalName, nameSrcLoc )
import Maybes ( mapCatMaybes )
import SrcLoc ( isGoodSrcLoc, SrcSpan )
import FastString ( FastString )
import Outputable
import Util ( thenCmp )
\end{code}
%************************************************************************
%* *
\subsection{The main data type}
......@@ -147,14 +139,14 @@ mkDerivedRdrName parent mk_occ
---------------
-- These two are used when parsing source files
-- They do encode the module and occurrence names
mkUnqual :: NameSpace -> UserFS -> RdrName
mkUnqual sp n = Unqual (mkOccFS sp n)
mkUnqual :: NameSpace -> FastString -> RdrName
mkUnqual sp n = Unqual (mkOccNameFS sp n)
mkVarUnqual :: UserFS -> RdrName
mkVarUnqual n = Unqual (mkOccFS varName n)
mkVarUnqual :: FastString -> RdrName
mkVarUnqual n = Unqual (mkVarOccFS n)
mkQual :: NameSpace -> (UserFS, UserFS) -> RdrName
mkQual sp (m, n) = Qual (mkModuleFS m) (mkOccFS sp n)
mkQual :: NameSpace -> (FastString, FastString) -> RdrName
mkQual sp (m, n) = Qual (mkModuleFS m) (mkOccNameFS sp n)
getRdrName :: NamedThing thing => thing -> RdrName
getRdrName name = nameRdrName (getName name)
......@@ -213,7 +205,7 @@ instance Outputable RdrName where
ppr (Qual mod occ) = ppr mod <> dot <> ppr occ <+> ppr_name_space occ
ppr (Orig mod occ) = ppr mod <> dot <> ppr occ <+> ppr_name_space occ
ppr_name_space occ = ifPprDebug (parens (occNameFlavour occ))
ppr_name_space occ = ifPprDebug (parens (pprNonVarNameSpace (occNameSpace occ)))
instance OutputableBndr RdrName where
pprBndr _ n
......
......@@ -740,10 +740,10 @@ pprCLbl (CC_Label cc) = ppr cc
pprCLbl (CCS_Label ccs) = ppr ccs
pprCLbl (ModuleInitLabel mod way _)
= ptext SLIT("__stginit_") <> ftext (moduleFS mod)
= ptext SLIT("__stginit_") <> ppr mod
<> char '_' <> text way
pprCLbl (PlainModuleInitLabel mod _)
= ptext SLIT("__stginit_") <> ftext (moduleFS mod)
= ptext SLIT("__stginit_") <> ppr mod
ppIdFlavor :: IdLabelInfo -> SDoc
ppIdFlavor x = pp_cSEP <>
......
......@@ -30,6 +30,7 @@ import CLabel ( CLabel )
import ForeignCall ( CCallConv )
import Unique ( Unique, Uniquable(..) )
import FastString ( FastString )
import DATA_WORD ( Word8 )
-----------------------------------------------------------------------------
-- Cmm, CmmTop, CmmBasicBlock
......@@ -251,9 +252,8 @@ data CmmStatic
-- align to next N-byte boundary (N must be a power of 2).
| CmmDataLabel CLabel
-- label the current position in this section.
| CmmString String
| CmmString [Word8]
-- string of 8-bit values only, not zero terminated.
-- ToDo: might be more honest to use [Word8] here?
-----------------------------------------------------------------------------
-- Global STG registers
......
......@@ -227,10 +227,10 @@ tok_decimal span buf len
= return (L span (CmmT_Int $! parseInteger buf len 10 octDecDigit))
tok_octal span buf len
= return (L span (CmmT_Int $! parseInteger (stepOn buf) (len-1) 8 octDecDigit))
= return (L span (CmmT_Int $! parseInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
tok_hexadecimal span buf len
= return (L span (CmmT_Int $! parseInteger (stepOnBy 2 buf) (len-2) 16 hexDigit))
= return (L span (CmmT_Int $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
tok_float str = CmmT_Float $! readRational str
......
......@@ -48,6 +48,7 @@ import Constants ( wORD_SIZE )
import Outputable
import Monad ( when )
import Data.Char ( ord )
#include "HsVersions.h"
}
......@@ -177,7 +178,7 @@ static :: { ExtFCode [CmmStatic] }
return [CmmStaticLit (getLit e)] }
| type ';' { return [CmmUninitialised
(machRepByteWidth $1)] }
| 'bits8' '[' ']' STRING ';' { return [CmmString $4] }
| 'bits8' '[' ']' STRING ';' { return [mkString $4] }
| 'bits8' '[' INT ']' ';' { return [CmmUninitialised
(fromIntegral $3)] }
| typenot8 '[' INT ']' ';' { return [CmmUninitialised
......@@ -427,6 +428,9 @@ section "rodata" = ReadOnlyData
section "bss" = UninitialisedData
section s = OtherSection s
mkString :: String -> CmmStatic
mkString s = CmmString (map (fromIntegral.ord) s)
-- mkMachOp infers the type of the MachOp from the type of its first
-- argument. We assume that this is correct: for MachOps that don't have
-- symmetrical args (e.g. shift ops), the first arg determines the type of
......
......@@ -45,6 +45,7 @@ import Data.Bits ( shiftR )
import Char ( ord, chr )
import IO ( Handle )
import DATA_BITS
import Data.Word ( Word8 )
#ifdef DEBUG
import PprCmm () -- instances only
......@@ -881,25 +882,21 @@ machRepSignedCType r | r == wordRep = ptext SLIT("I_")
-- ---------------------------------------------------------------------
-- print strings as valid C strings
-- Assumes it contains only characters '\0'..'\xFF'!
pprFSInCStyle :: FastString -> SDoc
pprFSInCStyle fs = pprStringInCStyle (unpackFS fs)
pprStringInCStyle :: String -> SDoc
pprStringInCStyle :: [Word8] -> SDoc
pprStringInCStyle s = doubleQuotes (text (concatMap charToC s))
charToC :: Char -> String
charToC '\"' = "\\\""
charToC '\'' = "\\\'"
charToC '\\' = "\\\\"
charToC c | c >= ' ' && c <= '~' = [c]
| c > '\xFF' = panic ("charToC "++show c)
| otherwise = ['\\',
charToC :: Word8 -> String
charToC w =
case chr (fromIntegral w) of
'\"' -> "\\\""
'\'' -> "\\\'"
'\\' -> "\\\\"
c | c >= ' ' && c <= '~' -> [c]
| otherwise -> ['\\',
chr (ord '0' + ord c `div` 64),
chr (ord '0' + ord c `div` 8 `mod` 8),
chr (ord '0' + ord c `mod` 8)]
-- ---------------------------------------------------------------------------
-- Initialising static objects with floating-point numbers. We can't
-- just emit the floating point number, because C will cast it to an int
......
......@@ -51,6 +51,7 @@ import FastString ( mkFastString )
import Data.List ( intersperse, groupBy )
import IO ( Handle )
import Maybe ( isJust )
import Data.Char ( chr )
pprCmms :: [Cmm] -> SDoc
pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms))
......@@ -391,7 +392,8 @@ pprStatic s = case s of
CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i)
CmmAlign i -> nest 4 $ text "align" <+> int i
CmmDataLabel clbl -> pprCLabel clbl <> colon
CmmString s' -> nest 4 $ text "I8[]" <+> doubleQuotes (text s')
CmmString s' -> nest 4 $ text "I8[]" <+>
doubleQuotes (text (map (chr.fromIntegral) s'))
-- --------------------------------------------------------------------------
-- Registers, whether local (temps) or global
......
......@@ -43,7 +43,7 @@ import MachOp
import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr )
import CLabel ( mkCCLabel, mkCCSLabel, mkRtsDataLabel )
import Module ( moduleUserString )
import Module ( moduleString )
import Id ( Id )
import CostCentre
import StgSyn ( GenStgExpr(..), StgExpr )
......@@ -292,7 +292,7 @@ emitCostCentreDecl
-> Code
emitCostCentreDecl cc = do
{ label <- mkStringCLit (costCentreUserName cc)
; modl <- mkStringCLit (moduleUserString (cc_mod cc))
; modl <- mkStringCLit (moduleString (cc_mod cc))
; let
lits = [ zero, -- StgInt ccID,
label, -- char *label,
......
......@@ -54,11 +54,12 @@ import ListSetOps ( assocDefault )
import Util ( filterOut, sortLe )
import DynFlags ( DynFlags(..), HscTarget(..) )
import Packages ( HomeModules )
import FastString ( LitString, FastString, unpackFS )
import FastString ( LitString, FastString, bytesFS )
import Outputable
import Char ( ord )
import DATA_BITS
import DATA_WORD ( Word8 )
import Maybe ( isNothing )
-------------------------------------------------------------------------
......@@ -77,7 +78,8 @@ addIdReps ids = [(idCgRep id, id) | id <- ids]
-------------------------------------------------------------------------
cgLit :: Literal -> FCode CmmLit
cgLit (MachStr s) = mkStringCLit (unpackFS s)
cgLit (MachStr s) = mkByteStringCLit (bytesFS s)
-- not unpackFS; we want the UTF-8 byte stream.
cgLit other_lit = return (mkSimpleLit other_lit)
mkSimpleLit :: Literal -> CmmLit
......@@ -308,10 +310,13 @@ emitRODataLits lbl lits
mkStringCLit :: String -> FCode CmmLit
-- Make a global definition for the string,
-- and return its label
mkStringCLit str
mkStringCLit str = mkByteStringCLit (map (fromIntegral.ord) str)
mkByteStringCLit :: [Word8] -> FCode CmmLit
mkByteStringCLit bytes
= do { uniq <- newUnique
; let lbl = mkStringLitLabel uniq
; emitData ReadOnlyData [CmmDataLabel lbl, CmmString str]
; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes]
; return (CmmLabel lbl) }
-------------------------------------------------------------------------
......
......@@ -69,7 +69,7 @@ import StaticFlags ( opt_SccProfilingOn, opt_OmitBlackHoling,
import Id ( Id, idType, idArity, idName )
import DataCon ( DataCon, dataConTyCon, isNullaryRepDataCon, dataConName )
import Name ( Name, nameUnique, getOccName, getOccString )
import OccName ( occNameUserString )
import OccName ( occNameString )
import Type ( isUnLiftedType, Type, repType, splitTyConApp_maybe )
import TcType ( tcSplitSigmaTy )
import TyCon ( isFunTyCon, isAbstractTyCon )
......@@ -930,12 +930,12 @@ closureValDescr, closureTypeDescr :: ClosureInfo -> String
closureValDescr (ClosureInfo {closureDescr = descr})
= descr
closureValDescr (ConInfo {closureCon = con})
= occNameUserString (getOccName con)
= occNameString (getOccName con)
closureTypeDescr (ClosureInfo { closureType = ty })
= getTyDescription ty
closureTypeDescr (ConInfo { closureCon = data_con })
= occNameUserString (getOccName (dataConTyCon data_con))
= occNameString (getOccName (dataConTyCon data_con))
getTyDescription :: Type -> String
getTyDescription ty
......
......@@ -18,7 +18,8 @@ import DsUtils ( EquationInfo(..), MatchResult(..),
import MatchLit ( tidyLitPat, tidyNPat )
import Id ( Id, idType )
import DataCon ( DataCon, dataConTyCon, dataConOrigArgTys, dataConFieldLabels )
import Name ( Name, mkInternalName, getOccName, isDataSymOcc, getName, mkVarOcc )
import Name ( Name, mkInternalName, getOccName, isDataSymOcc,
getName, mkVarOccFS )
import TysWiredIn
import PrelNames ( unboundKey )
import TyCon ( tyConDataCons, tupleTyConBoxity, isTupleTyCon )
......@@ -382,7 +383,7 @@ make_row_vars used_lits (_, EqnInfo { eqn_pats = pats})
new_var = hash_x
hash_x = mkInternalName unboundKey {- doesn't matter much -}
(mkVarOcc FSLIT("#x"))
(mkVarOccFS FSLIT("#x"))
noSrcLoc
make_row_vars_for_constructor :: (EqnNo, EquationInfo) -> [WarningPat]
......
......@@ -28,9 +28,8 @@ import SMRep ( argMachRep, typeCgRep )
import CoreUtils ( exprType, mkInlineMe )
import Id ( Id, idType, idName, mkSysLocal, setInlinePragma )
import Literal ( Literal(..), mkStringLit )
import Module ( moduleString )
import Module ( moduleFS )
import Name ( getOccString, NamedThing(..) )
import OccName ( encodeFS )
import Type ( repType, coreEqType )
import TcType ( Type, mkFunTys, mkForAllTys, mkTyConApp,
mkFunTy, tcSplitTyConApp_maybe,
......@@ -146,7 +145,7 @@ dsFImport id (CImport cconv safety header lib spec)
= dsCImport id spec cconv safety no_hdrs `thenDs` \(ids, h, c) ->
returnDs (ids, h, c, if no_hdrs then Nothing else Just header)
where
no_hdrs = nullFastString header
no_hdrs = nullFS header
-- FIXME: the `lib' field is needed for .NET ILX generation when invoking
-- routines that are external to the .NET runtime, but GHC doesn't
......@@ -246,7 +245,7 @@ dsFCall fn_id fcall no_hdrs
the_ccall_app = mkFCall ccall_uniq fcall val_args ccall_result_ty
work_rhs = mkLams tvs (mkLams work_arg_ids the_ccall_app)
work_id = setImpInline no_hdrs $ -- See comments with setImpInline
mkSysLocal (encodeFS FSLIT("$wccall")) work_uniq worker_ty
mkSysLocal FSLIT("$wccall") work_uniq worker_ty
-- Build the wrapper
work_app = mkApps (mkVarApps (Var work_id) tvs) val_args
......@@ -356,7 +355,7 @@ dsFExportDynamic id cconv
getModuleDs `thenDs` \ mod_name ->
let
-- hack: need to get at the name of the C stub we're about to generate.
fe_nm = mkFastString (moduleString mod_name ++ "_" ++ toCName fe_id)
fe_nm = mkFastString (unpackFS (zEncodeFS (moduleFS mod_name)) ++ "_" ++ toCName fe_id)
in
newSysLocalDs arg_ty `thenDs` \ cback ->
dsLookupGlobalId newStablePtrName `thenDs` \ newStablePtrId ->
......
......@@ -30,16 +30,16 @@ import qualified Language.Haskell.TH as TH
import HsSyn
import Class (FunDep)
import PrelNames ( rationalTyConName, integerTyConName, negateName )
import OccName ( isDataOcc, isTvOcc, occNameUserString )