Commit f5ffd8d9 authored by Ben Gamari's avatar Ben Gamari 🐢 Committed by Marge Bot

base: Expose GHC.Unicode.unicodeVersion

This exposes a Data.Version.Version representing the version of the
Unicode database used by `base`. This should clear up some confusion I
have seen in tickets regarding with which Unicode versions a given GHC
can be expected to work.

While in town I also regenerated (but did not update) the Unicode
database with database 12.0.0. Strangely, the file cited in the README
no longer existed. Consequently, I used
https://www.unicode.org/Public/12.0.0/ucd/UnicodeData.txt and was
slightly surprised to find that there were a few changes.
parent 059c3c9d
{-# LANGUAGE NoImplicitPrelude #-}
module Data.Version
( Version
, makeVersion
) where
import GHC.Base
data Version
makeVersion :: [Int] -> Version
......@@ -19,6 +19,7 @@
-----------------------------------------------------------------------------
module GHC.Unicode (
unicodeVersion,
GeneralCategory (..), generalCategory,
isAscii, isLatin1, isControl,
isAsciiUpper, isAsciiLower,
......@@ -36,12 +37,18 @@ import GHC.Real
import GHC.Enum ( Enum (..), Bounded (..) )
import GHC.Ix ( Ix (..) )
import GHC.Num
import {-# SOURCE #-} Data.Version
-- Data.Char.chr already imports this and we need to define a Show instance
-- for GeneralCategory
import GHC.Show ( Show )
#include "HsBaseConfig.h"
#include "UnicodeVersion.h"
-- | Version of Unicode standard used by @base@.
unicodeVersion :: Version
unicodeVersion = makeVersion UNICODE_VERSION_NUMS
-- | Unicode General Categories (column 2 of the UnicodeData table) in
-- the order they are listed in the Unicode standard (the Unicode
......
Generating GHC's Unicode table
==============================
WCsubst.c is generated with:
sh ubconfc < UnicodeData.txt > WCsubst.c
sh ubconfc 12.0.0 < UnicodeData.txt > WCsubst.c
where UnicodeData.txt came from
https://www.unicode.org/Public/12.0.0/ucd/UnicodeData-12.0.0d4.txt
https://www.unicode.org/Public/12.0.0/ucd/UnicodeData.txt
Don't forget to mention the update in the User's Guide.
/*-------------------------------------------------------------------------
This is an automatically generated file: do not edit
Generated by ubconfc at Tue Aug 14 10:04:18 UTC 2018
Generated by ubconfc at Mon Feb 10 11:42:08 EST 2020
@generated
-------------------------------------------------------------------------*/
......@@ -90,7 +90,7 @@ struct _charblock_
#define GENCAT_MN 2097152
#define GENCAT_LO 16384
#define MAX_UNI_CHAR 1114109
#define NUM_BLOCKS 3349
#define NUM_BLOCKS 3352
#define NUM_CONVBLOCKS 1326
#define NUM_SPACEBLOCKS 7
#define NUM_LAT1BLOCKS 63
......@@ -1485,7 +1485,8 @@ static const struct _charblock_ allchars[]={
{5112, 6, &rule110},
{5120, 1, &rule7},
{5121, 620, &rule14},
{5741, 2, &rule2},
{5741, 1, &rule13},
{5742, 1, &rule2},
{5743, 17, &rule14},
{5760, 1, &rule1},
{5761, 26, &rule14},
......@@ -2799,8 +2800,8 @@ static const struct _charblock_ allchars[]={
{43444, 2, &rule124},
{43446, 4, &rule92},
{43450, 2, &rule124},
{43452, 1, &rule92},
{43453, 4, &rule124},
{43452, 2, &rule92},
{43454, 3, &rule124},
{43457, 13, &rule2},
{43471, 1, &rule91},
{43472, 10, &rule8},
......@@ -3302,14 +3303,14 @@ static const struct _charblock_ allchars[]={
{71935, 1, &rule14},
{72096, 8, &rule14},
{72106, 39, &rule14},
{72145, 1, &rule124},
{72146, 1, &rule92},
{72147, 1, &rule124},
{72145, 3, &rule124},
{72148, 4, &rule92},
{72154, 2, &rule92},
{72156, 4, &rule124},
{72160, 1, &rule92},
{72161, 3, &rule14},
{72161, 1, &rule14},
{72162, 1, &rule2},
{72163, 1, &rule14},
{72164, 1, &rule124},
{72192, 1, &rule14},
{72193, 10, &rule92},
......@@ -3545,7 +3546,8 @@ static const struct _charblock_ allchars[]={
{123184, 7, &rule92},
{123191, 7, &rule91},
{123200, 10, &rule8},
{123214, 2, &rule14},
{123214, 1, &rule14},
{123215, 1, &rule13},
{123584, 44, &rule14},
{123628, 4, &rule92},
{123632, 10, &rule8},
......@@ -3556,6 +3558,7 @@ static const struct _charblock_ allchars[]={
{125184, 34, &rule203},
{125218, 34, &rule204},
{125252, 7, &rule92},
{125259, 1, &rule91},
{125264, 10, &rule8},
{125278, 2, &rule2},
{126065, 59, &rule17},
......
......@@ -17,6 +17,26 @@
# Output the file header
VERSION="$1"
if [ -z "$VERSION" ]; then
echo "Usage: $0 [unicode version]"
exit 1
fi
# This file is #included from GHC.Unicode and is used to define
# GHC.Unicode.unicodeVersion.
cat > $(dirname $0)/../include/UnicodeVersion.h <<EOF
#if 0
This is an automatically generated file: do not edit
Generated by `basename $0` at `date`
@generated
#endif
#define UNICODE_VERSION_NUMS [$(echo $VERSION | sed 's/\./,/g')]
EOF
exec > $(dirname $0)/WCsubst.c
echo "/*-------------------------------------------------------------------------"
echo "This is an automatically generated file: do not edit"
echo "Generated by `basename $0` at `date`"
......
#if 0
This is an automatically generated file: do not edit
Generated by ubconfc at Mon Feb 10 11:42:08 EST 2020
@generated
#endif
#define UNICODE_VERSION_NUMS [12,0,0]
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment