pkg-spec.sgml 55.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE ARTICLE PUBLIC "-//OASIS//DTD DocBook V3.1//EN" [
<!entity  ghc         "<application>GHC</application>">
<!entity  nhc         "<application>NHC</application>">
<!entity  hugs        "<application>Hugs</application>">
<!entity  autoconf    "<application>autoconf</application>">
<!entity  impl        "Haskell Implementation">
<!entity  impls       "Haskell Implementations">
<!entity  distMod     "<className>Distribution</className> module">
<!entity  haskellConfig  "<application>haskell-config</application>">
<!entity  DistPackage  "<classname>Distribution.Package</classname>">
<!entity  DistBuild    "<classname>Distribution.Build</classname>">
<!entity  DistInstall  "<classname>Distribution.Install</classname>">
<!entity  DistConfigure   "<classname>Distribution.Config</classname>">
<!entity  hunit       "<application>HUnit</application>">
<!entity  distutils   "<application>Distutils</application>">
<!entity  cpan        "<application>CPAN</application>">
<!entity  xemacs      "<application>XEmacs</application>">
<!entity  hmake       "<application>hmake</application>">
<!entity  dpkg        "<application>dpkg</application>">
<!entity  rpm        "<application>rpm</application>">
]>
 
<!-- You should search this document for 'foo' and delete it. -->
<!-- Look for FIX too -->

<!-- ToDo: -->

<!-- Look at Package data types again and add the version information where -->
<!-- necessary. -->

<article id="lip">
  <artHeader>
    <date>2003-09-12</date>
    <title>The Haskell Package System</title>
    <author>
      <firstname>Isaac</firstname>
      <surname>Jones</surname>
    </author>
    <author>
      <firstname>Simon</firstname>
      <surname>Peyton Jones</surname>
    </author>
    <author>
      <firstname>Simon</firstname>
      <surname>Marlow</surname>
    </author>
    <author>
      <firstname>Malcolm</firstname>
      <surname>Wallace</surname>
    </author>

    <address><email>ijones@syntaxpolice.org</email></address>
    <abstract>

      <para>The Haskell Library Infrastructure Project is an effort to provide
      a framework for developers to more effectively contribute their
      software to the Haskell community.  This document specifies
      the <emphasis>Haskell Package System (HPS)</emphasis>, which contributes 
	to the goals of the Haskell Library Infrastructure Project.
      </para>
      <para>Specifically, the HPS describes what a Haskell 
	package is, how these packages interact with the language,
	and what Haskell implementations must to do to support packages.
	The HPS also specifies some infrasturucture (code) that makes
	it easy for library authors to build and distribute conforming packages.
	</para>
      <para>The HPS is only one contribution to the Library Infrastructure project.
	In particular, the HPS says nothing about more global issues such as
	how authors decide where in the module name space their library should live;
	how users can find a package they want; how orphan packages find new owners;
	and so on.</para>
      <para> The HPS has been discussed by the implementors of GHC, nhc, and Hugs,
	all of whom are prepared to implement it.  The proposal is now open
	for wider debate. Please contribute by emailing libraries@haskell.org.</para>
    </abstract>
  </artheader>


<!-- Solution ------------------------------------------------- -->

<sect1 id=hli-goals><title>The Haskell Package System: goals</title>

<para>The Haskell Package System (HPS) has the following main goal:
to specify a standard way in which a Haskell library can be packaged, so that it is
easy for consumers to use it, or re-package it, 
regardless of the Haskell implementation or installation platform.</para>  

  <note><para>Would some name other than "Haskell Package System" be better?</para></note>

<para>The HPS also
supports library authors by providing an instrastructure that automates the
process of building and packaging simple libraries.  It is not necessary to use
this code --- indeed complex libraries may exceed its abilities --- but it should
handle many cases with no trouble.</para>

<sect2><title>Dramatis personae</title>

<para>The HPS serves a number of different people:
<itemizedlist>
<listitem><para>
 <emphasis>Joe User</emphasis> is simply a Haskell user.  He does not download new pacakges.  Nevertheless,
he needs to know about his Haskell compiler's <function>-package</function> flag (see <xref linkend="compiler-reqts">).
	    </para>
	  </listitem>
<listitem><para>
 <emphasis>Bob the Builder</emphasis> and <emphasis>Sam Sysadmin</emphasis> both download, build,
  and install new pacakges. The only difference between the two is that Sam has root permission,
  and can install pacakges in more globally-visible places.
	    </para>
	  </listitem>
<listitem><para>
 <emphasis>Roland RPM</emphasis>, <emphasis>Donald Debian</emphasis>, and <emphasis>Willie Windows</emphasis> 
  build Linux RPM, Debian, and Windows installer packages respectively.  (This list is not exhaustive.)
  They do this as a service for Angela Author and the community, and may know little or nothing about the internal details 
  of the Haskell packages they are wrapping up.
	    </para>
	  </listitem>
<listitem><para>
 <emphasis>Angela Author</emphasis> wants to write a simple Haskell library, and distribute it with 
minimum fuss, in such a way that all the above folk can easily use it.
	    </para>
	  </listitem>
<listitem><para>
 <emphasis>Marcus Makefile</emphasis> is like Angela, but more sophisticated.  He has a complicated library,
and uses makefiles.  Still, he wants to arrange that Roland, Donald, Bob, Sam, and Joe don't need to know
about his internal complexity.
	    </para>
	  </listitem>
	</itemizedlist>
We describe Angela and Marcus as <emphasis>producers</emphasis> of their packages, and all the others
as package <emphasis>consumers</emphasis>.  
      </para>
    </sect2>

<sect2 id="example"><title>An example</title>

<para>To give the idea, here is a simple example.  Angela has written a couple of Haskell modules that
implement sets and bags; she wants to distribute them to Bob as a package called,
say, <literal>angela-coll</literal>.  Let's say that the modules are
<literal>Data.Set</literal>, <literal>Data.Bag</literal>, <literal>Angela.Internals</literal>.
(The HPS says nothing about how Angela decides where in the name space to put her modules.)
Angela only wants to expose the first two to Bob; the <literal>Angela.Internals</literal> module is (as its
name suggests) internal to the package.
</para>
<para>Angela decides to use the simple build infrastructure that the HPS provides.  She is working in
a directory <filename>~/coll</filename>.  In there she puts the modules, in sub-directories
driven by their module name: <filename>~/coll/Data/Set.hs</filename>,
 <filename>~/coll/Data/Bag.hs</filename>,
 and <filename>~/coll/Angela/Internals.hs</filename>.
Next, she writes
a <emphasis>package description</emphasis>, which she puts in <filename>~/coll/pkg.desc</filename>:
<programlisting>
    name:     angela-coll
    version:  1
</programlisting>
She also creates a two-line Haskell file <filename>~/coll/Setup.lhs</filename> as follows:
<programlisting>
  #! runhugs
  > import Distribution.Simple( main )
</programlisting>
The first line arranges that when Angela executes <filename>Setup.lhs</filename> as a shell script,
the shell will invoke <command>ruhugs</command>, which will in turn run <literal>main</literal> imported
from the library <literal>Distribution.Simple</literal>.  This library implements the HPS simple build infrastructure.
</para>
<para> Now she is ready to go. She types:
<programlisting>
  ./Setup.lhs configure --ghc
  ./Setup.lhs build
  ./Setup.lhs src-dist
</programlisting>
The first line readies the system to build the library using GHC; for example, it checks that GHC exists on the system.
The second line checks that
the library does indeed build flawlessly.  (At this point she can write and execute tests, as we discuss later.)
The third line wraps up the package as a source distribution, making the file <filename>~/coll/angela-coll-1.tar.gz</filename>.
</para>
<para>
Angela emails the tar file to Bob, who untars it into <filename>tmp/coll</filename>.
He <command>cd</command>'s to that directory and types
<programlisting>
  ./Setup.lhs configure --ghc
  ./Setup.lhs build
  ./Setup.lhs install
</programlisting>
He's all done. Now in his Haskell programs, Bob can simply <literal>import</literal> the new
modules <literal>Data.Set</literal> and <literal>Data.Bag</literal>. He does not need to give extra flags
to GHC to tell it to look for Angela's modules; they are there automatically.
If Angela used the same module names as someone else, Bob may need finer control: see <xref linkend="compiler-reqts">.</para>
<para>
If Angela wrote her modules in Haskell 98, Bob could also have said <option>--hugs</option> or
<option>--nhc</option> in his <option>configure</option> line, and the package would have been
built and installed for those compilers instead.
      </para>
    </sect2>
  </sect1>


<sect1 id=hli-overview><title>The Haskell Package System: overview</title>

<para>This section summarises the vocabulary and main features of the Haskell Package System.</para>

<sect2 id="package-descr"><title>Packages</title>

<para>A <emphasis>package</emphasis> is the unit of distribution for the HPS.
Its purpose in life, when installed, is to make available some Haskell modules for
import by some other Haskell program.   However, a package may consist of 
much more than a bunch of Haskell
modules: it may also have C source code and header files, documentation, test cases, auxiliary 
tools and whatnot.</para>

<para>
Each package has:
<itemizedlist>
<listitem><para>A globally-unique <emphasis>package name</emphasis>, containing no spaces. Chaos will result
if two distinct packages with the same name are installed on the same system.  How unique package names are
handed out is not part of this specification, but there will presumably be some global web site where
pacakge authors can go to register a package name. </para>
</listitem>
<listitem><para>A <emphasis>version</emphasis>, consisting of a sequence of one or more integers.
</para></listitem>

<listitem><para>
<emphasis>A list of explicit dependencies</emphasis> on other packages.  These are typically not exact; e.g. "I need <function>hunit</function> version
greater than 2.4".
	    </para>
	  </listitem>
<listitem><para>
<emphasis>A list of exposed modules</emphasis>.  Not all of the modules that comprise a package implementation are necessarily
exposed to a pacakge client.
The ability to expose some, but not all, of the modules making up a package
is rather like using an explicit export list on a Haskell module.</para>
	  </listitem>
</itemizedlist>
The first two components can be combined to form a single text string called the <emphasis>package ID</emphasis>, 
using a hyphen to separate the version
from the name, and dots to separate the version components.  For example, "hunit-2.3".
</para>

</sect2>

<sect2 id="packages-and-haskell"><title>Packages and the Haskell language</title>
<para>A complete Haskell program will consist of one or more modules (including <literal>Main</literal>)
compiled against one or more packages (of which the Prelude is one).  These pacakges are not referred to
explicitly in the Haskell source; instead, the pacakges simply populate the hierarchical space of module names.
</para>
<para>Complete programs must obey the following invariant.  <emphasis>Consider all the Haskell modules that
constitute a complete program: no two modules must have the same module name.</emphasis></para>

<para>This invariant is conservative.  It preserves the existing semantics of Haskell, and is 
relatively easy to implement.  In particular, the the full name of an entity (type, class, function), which is
used to determine when two entities are the same, is simply a pair of the module name and the entity name.</para>

<para>The invariant is unsatisfactory, however, because it does not support abstraction at
the package level. For example, a module with an internal (hidden, non-exposed) module called <literal>Foo</literal>
cannot be used in the same program as another package with an unrelated internal module also called <literal>Foo</literal>.
Nor can a program use two pacakges, P and Q, which depend on different versions of the same underlying package R.
We consided more sophisticated schemes, in which (for example) the package name, or package ID, is implicitly
made part of every module name.  But (a) there is a big design space, and (b) it places new requirements on the
implementations.  Hence a conservative starting point.</para>
    </sect2>

<sect2><title>Packages and compilers</title>

<para>We use the term ``compiler'' to mean GHC, Hugs, nhc, hbc, etc.  (Even though
Hugs isn't really a compiler, the term is less clumsy than ``Haskell implementation''.)</para>

<para> The
HPS requires that a conforming Haskell compiler is somewhat package aware.
In summary, the requirements are these:
<itemizedlist>
<listitem><para>Each compiler <function>hc</function> must provide an associated package-management
program <function>hc-pkg</function>.  A compiler user installs a package by placing the package's 
supporting files somewhere, and then using <function>hc-pkg</function> to make the compiler aware
of the new package.  This step is called <emphasis>registering the package with the compiler</emphasis>.
</para></listitem>
<listitem><para>To register a package, <function>hc-pkg</function> takes as input
an <emphasis>installed package description (IPD)</emphasis>, 
which describes the installed form of the package in detail.  
The format of an IPD is given in <xref linkend="ipd">.</para>
	  </listitem>

<listitem><para>Subsequent invocations of <function>hc</function> will include modules from the
new package in the module name space (i.e. visible to <function>import</function> statements).
</para></listitem>
<listitem><para>
The compiler should support a <function>-package</function> flag for finer-grain control
of package visibility.
</para></listitem>
</itemizedlist>
A complete specification of these requirements is given in <xref linkend="compiler-reqts">.
</para> 
</sect2>

<sect2><title>Package distributions</title>

<para>A HPS package can be distributed in several forms:
<itemizedlist>
<listitem><para>
A <emphasis>HPS source distribution</emphasis> is a tree of files (tar-ball, zip file etc)
containing the library sources, which may need to be
compiled before being installed.  The same source tarball may well be installable for several
Haskell implementations, OSs, and platforms.
	    </para>
<para>A source distribution may contain fewer files than appear in the developer's CVS repository; for example,
tests and design notes may be omitted.  It may also contain some derived files, that do not appear in the
the developer's repository; for example, ones made by a somewhat exotic pre-processor where it
seems simpler to ship the derived file than to ensure that all consumers have the pre-processor.</para></listitem>
<listitem><para>
A <emphasis>HPS binary distribution</emphasis> is a tree of files that contains a pre-compiled library, ready
for installation.  The pre-compilation means that the distribution will be Haskell-compiler-specific, and certain 
"looser" dependencies (<literal>hunit > 2.3</literal>) will now be precisely fixed (<literal>hunit == 2.4</literal>).
	    </para>
	  </listitem>

<listitem><para>
The package may be wrapped up as an <emphasis>RPM</emphasis>, <emphasis>Debian</emphasis> distribution, 
or <emphasis>Windows installer</emphasis>.  (This list is not exhaustive.)
In that case, the way it is installed is prescribed by the respective distribution mechanism;
the only role of the HPS is to make it easy to construct such distributions. All three are
compiler-specific binary distributions.
	    </para>
	  </listitem>
	</itemizedlist></para>
    </sect2>

<sect2><title>The Setup script</title>

<para>The key question is this: how should Angela Author present her HPS package so that 
her consumers (Bob, Sam, Willie, etc) can conveniently use it?</para>

<para>Answer: she provides a tree of files, with two specific files in the
root directory of the tree:  
<itemizedlist>
<listitem><para><function>pkg.desc</function> contains a short description of the package:
specifically, the package name, version, and dependencies.  It may also contain further information
specific to the particular build system.  The syntax of package the package description file
is given in <xref linkend="pkg-desc">.
	    </para>
	  </listitem>
<listitem><para>
<function>Setup.lhs</function> is an executable Haskell program 
which conforms to a particular specification, given in detail in <xref linkend="setup">.
In summary, though, <function>Setup.lhs</function> allows a consumer to configure, build,
test, install, register, and un-register a package.  </para>
	  </listitem>
	</itemizedlist>
The HPS allows a package author to write the setup script in any way she pleases, provided
it conforms to the specification of <xref linkend="setup">.  However, many Haskell packages consist
of little more than a bunch of Haskell modules, and for these the HPS provides <emphasis>the simple
build infrastructure</emphasis>, a Haskell library that does
all the work.  The simple build infrastructure, which was used for the example in 
<xref linkend="example">, is described in <xref linkend="sbi">.</para>

<!-- Why Haskell ---------------------------- -->

<para>In principle, the <function>Setup</function> script
could be written in any language; so why do we use Haskell?
<itemizedList>

<listItem><para>Haskell runs on all the systems of interest.</para></listItem>

<listItem><para>Haskell's standard libraries should include a rich set of operating
system operations needed for the task.  These can abstract-away the
differences between systems in a way that is not possible for
Make-based tools.</para></listItem>

<listItem><para>Haskell is a great language for many things, including tasks
typically relegated to languages like Python.  Building, installing,
and managing packages is a perfect proving ground for these tasks, and
can help us to discover weaknesses in Haskell or its libraries that
prevent it from breaking into this "market".  A positive side-effect
of this project might be to make Haskell more suitable for "scripting"
tasks.</para></listItem>

<listItem><para>Likewise, each piece of the project (Building, Installing, and
Packaging) can be leveraged elsewhere if we make them into
libraries.</para></listItem>

<listItem><para>Make is not particularly good for parsing, processing, and sharing
meta-information about packages.  The availability of this information
to Haskell systems (including compilers, interpreters, and other
tools) is useful.  Unlike Make, Haskell can also reuse unrelated
algorithms, parsers, and other libraries that have been developed in
the past.</para></listItem>

<listItem><para><emphasis>Dogfooding</emphasis>, the act of using the tools you
develop, is a healthy policy.</para></listItem>

</itemizedList>
It is convenient for consumers to execute <function>Setup.lhs</function> directly, thus:
<programlisting>
  ./Setup.lhs ...
</programlisting>
This can be achieved by starting <function>Setup.lhs</function> with "<function>#! runhugs</function>"
or "<function>#! runghc</function>" .
Since it's a literate Haskell script (<function>.lhs</function> file), the Haskell compiler will ignore
this line.
However, nothing stops a consumer from running the script interactively, or compiling it and running 
the compiled binary.  Any implementation of Haskell should suffice to run the script, provided 
the implementation has the HPS libraries installed.  
	</para>
      </sect2>
  </sect1>

<!-- COMPILER REQUIREMENTS ---------------------------- -->

<sect1 id="compiler-reqts"><title>What the compilers must implement</title>

<para>The HPS requires that the Haskell implementations be somewhat package-aware.
This section documents those requirements</para>

<sect2><title>Building and registering a package</title>

<para>Installing a package ultimately involves these steps:
<itemizedlist>
<listitem> <para>
 <emphasis>Compiling the source files</emphasis>, by invoking the compiler.  Even Hugs may require 
     some processing (e.g running cpp).
</para></listitem>

<listitem> <para>
  <emphasis>Copying the compiled files into some permanent place</emphasis>.  Typically the compiler
     places no pre-conditions on where "some place" is; instead one
     usually follows the conventions of the host operating system.
</para></listitem>

<listitem> <para>
  <emphasis>Registering the package</emphasis>: telling the compiler about the 
     existence of the package, and where its files are.
  To register the package one invokes a compiler-specific program <function>hc-pkg</function> (i.e. <function>ghc-pkg</function>, 
  <function>hugs-pkg</function> etc), passing it an <emphasis>installed package description (IPD)</emphasis>
  describing the package.  The format of an IPD is given in <xref linkend="ipd">.
</para></listitem>
</itemizedlist>
</para>
<para>It must be possible to register many versions of the same package.</para>

<sect3><title>Shared packages and user packages</title>

<para>
A package can be registered either as a <emphasis>shared package</emphasis> or as a <emphasis>user package</emphasis>.
The former means that anyone invoking <function>hc</function> will see the new package.  The latter means
that only the user who installed the package will see it.
</para>
<para>
User packages <emphasis>shadow</emphasis> shared packages, in the following sense:
	  <itemizedlist><listitem><para>
		A Haskell <function>import</function> for module M will seek M in a user package first.
	      </para>
	    </listitem><listitem><para>
		The <function>hc-pkg</function> commands that take package IDs will look for a user package first.
	      </para>
	    </listitem>
	  </itemizedlist></para>
<note><para>Hmm.  If there are several ghc's around, can their user packages get mixed up?  I hope not.</para></note>

</sect3>

<sect3><title>Exposed packages and hidden packages</title>

<para>
An installed package can be <emphasis>exposed</emphasis> or <emphasis>hidden</emphasis>.  An exposed package
populates the module name space, while a hidden package does not.  Hidden packages are nevertheless necessary.
For example, the user might use
package A-2.1 and B-1.0; 
but B-1.0 might depend on A-1.9. So the latter must be installed (else B-1.0 could not be installed), but
should be hidden, so that user imports see A-2.1. (However, note that the 
whole-program invariant described in <xref linkend="packages-and-haskell"> implies that a program using B-1.0 could
not also use A-2.1, because then both A-2.1 and A-1.9 would form part of the program, and they almost certainly 
use the same module names.)
</para>
<para>The registration program <function>hc-pkg</function> provides operations to expose or hide an 
already-installed package.  By default, installing a package installs it exposed, and hides any
existing installed package of the same name (and presumably different version).
	  </para>
<para>Hiding or exposing a package is an operation that can be performed, by <function>hc-pkg</function>,
on any package.  It is quite distinct from the question of which modules in a pacakge are hidden or
exposed (see <xref linkend="package-descr">), which is a property of the package itself.  Only the exposed
modules of an exposed package populate the module name space seen by a Haskell <literal>import</literal> statement.
</para>
</sect3>

<sect3><title>Registration invariants</title>

<para>The registration program <function>hc-pkg</function> checks the following invariants:
<itemizedlist>
<listitem> <para>
Before registering a package P, check all the packages that P depends on are already registered.
If P is being registered as a shared package, P's dependencies must also be shared packages.
</para></listitem>
<listitem> <para>
Before registering an exposed user package P, check that the modules that are exposed by P do not have the same
names (in the hierarchical module name space) as any other module in an exposed user package Q.  Similarly
for system packages.  (However, a system package may expose a module with the same name as a user package.)
</para></listitem>
<listitem> <para>
Before un-registering a package P, check that no package that depends on P is registered.
The exception is that when un-registering a shared package, <function>hc-pkg</function> cannot
check that no user has a user package depending on P.</para>
	    </listitem>
</itemizedlist>
	</para>
      </sect3>
    </sect2

<sect2><title>The <function>-package</function> compiler flag</title>

<para>
By default, the module namespace is populated only by the exposed modules of exposed packages.
This can be overridden using the <function>-package</function> flag, which temporarily exposes a
particular package, hiding any other pacakges of the same name.</para>  
<note><para>Question: what does <function>-package hunit-1.2 -package hunit-1.3</function> do?
Does the second override the first?  Or are they both in scope?  I think the former.</para>
<para>Question: what if the -package flag breaks the registration invariants?  For example, the
newly-exposed package might expose a module that clashes with an existing one.  Maybe the
second registration invariant should be checked for each run of the compiler, rather than
by hc-pkg?</para></note>
</sect2>

<sect2><title>The interface to <function>X-pkg</function></title>

<para>Registering a package with a compiler records the package information in some
implementation-specific way; how it does so is not constrained by the HPS.
Much of an IPD is independent of the compiler, but it may also include compiler-specific
fields.</para>

<para>Each Haskell implementation <function>hc</function> must provide an associated program <function>hc-pkg</function> which 
allows a user to make a new package known to the compiler, and to ask what packages it knows. Here is a summary of its interface

<note><para>Some of these commands (unregister, hide, and describe) make sense for package IDs which offer a range, such as "hc-pkg unregister "hmake<2.3".</para></note>

     <table frame=all><title><function>hc-pkg</function> interface</title>

     <tgroup cols=2 align=left colsep=1 rowsep=1>     <tbody>

     <row><Entry><cmdsynopsis><command>hc-pkg register</command> <group choice=req>
		      <arg><replaceable>filename</replaceable></arg>
		      <arg><option>-</option></arg></group>
		  <arg choice=opt><option>-user</option></arg>
		</cmdsynopsis></entry>
                    <Entry><para>Register the package using the specified installed package description.
		  The syntax for the latter is given in <xref linkend="ipd">.</para></entry></row>


     <row><Entry><cmdsynopsis><command>hc-pkg unregister</command> 
		<arg><replaceable>pkg-id</replaceable></arg>
		  </cmdsynopsis></entry>
                    <Entry><para>Unregister the specified package.</para></entry></row>

     <row><Entry><cmdsynopsis><command>hc-pkg expose</command> 
		<arg><replaceable>pkg-id</replaceable></arg>
		  </cmdsynopsis></entry>
                    <Entry><para>Expose the specified package.</para></entry></row>

     <row><Entry><cmdsynopsis><command>hc-pkg hide</command> 
		<arg><replaceable>pkg-id</replaceable></arg>
		  </cmdsynopsis></entry>
                    <Entry><para>Hide the specified package.</para></entry></row>

     <row><Entry><command>hc-pkg list</command></entry>
                    <Entry><para>List all registered packages, both shared and user, hidden and exposed. </para></entry></row>


     <row><Entry><cmdsynopsis><command>hc-pkg describe</command> <arg choice=req><replaceable>pkg-id</replaceable> </arg>
		  </cmdsynopsis></entry>
         <Entry><para>Give the registered description for the specified package.
		    The description is returned in precisely the syntax required by 
		    <command>hc-pkg register</command>.  
		</para></entry></row>

     <row><Entry><cmdsynopsis><command>hc-pkg field</command> <arg choice=req><replaceable>pkg-id</replaceable> </arg>
		<arg choice=req><replaceable>field</replaceable> </arg>
		  </cmdsynopsis></entry>

         <Entry><para>Extract the specifed field of the package description for the specified package.
		</para></entry></row>

     </tbody></tgroup>
     </table>
A <replaceable>pkg</replaceable> argument can be a package ID, such as "<function>hunit-2.3</function>", or just a package name, 
such as "<function>hunit</function>".  To determine which package is meant, <function>hc-pkg</function> searches first the
registered user packages and then the shared packages.  If no such package exists, the command fails; that is, it does nothing, 
returning a non-zero error code.
If only a name is specified, <function>hc-pkg</function> fails
unless the name identifies a unique package among the user packages, or among the shared pacakges.  As usual, the
user packages win.
      </para>
<note><para>Can we give the <option>-user</option> flag to <command>hide</command>, <command>expose</command>,
<command>describe</command>?  Can we register a package that is already registered?  What if it's registered
as a shared package and we register it as a user package?</para>
      </note>
</sect2>

<sect2 id=ipd><title>Syntax of installed package description</title>

<note><para>...include the list of ``externally visible modules''.</para></note>
</sect2>
</sect1> <!-- End of compiler requirements --> 


<!-- Setup script -->

<sect1 id=setup><title>The setup script</title>

<para>
The sole requirement of an HPS package is that it should contain,
in the root of its file structure, (a) a package description file <function>pkg.desc</function>,
and (b) a setup script, <function>Setup.lhs</function>.
This section
specifies the syntax of the package description, and the command-line interface for the setup script.
</para>

<sect2 id=pkg-desc><title>The package description</title>

<para>Here is a sample package description file:
<programlisting>
  name:    hunit
  version: 4.3
  deps:    [ foogle > 2.9, bargle = 2.5.1 ]
</programlisting>
The first two lines are compulsory. The dependencies are optional, assumed empty if omitted. 
There can be more lines, all of the form <emphasis>name: stuff</emphasis>, specific to the particular
setup script in use.  For the HPS-provided simple build infrastructure, the package description syntax
is given in <xref linkend="sbi-pkg-desc">.
</para>
<para>The exact syntax is still undecided.  Should we put everything in quotes?
<programlisting>
  deps:    [ "foogle > 2.9", "bargle = 2.5.1" ]
</programlisting>
If not, how can a program that doesn't understand a particular field safely ignore it?  Skip to end of line?
But there may be many lines in field like <function>deps</function>, 
</para>
    </sect2>

<sect2 id=setup-spec><title>The setup script specification</title>

<para>Here is the command-line interface the setup script must satisfy.
     <table frame=all><title>setup.lhs interface</title>

     <tgroup cols=2 align=left colsep=1 rowsep=1>     <tbody>

     <row><Entry><command>./Setup.lhs configure [flags] </command></entry>
         <Entry><para> Prepare to build the package.  Typically, this step checks
		that the target platform is capable of building the package, and
		discovers platform-specific features that are needed during the build. 
		</para></entry></row>


     <row><Entry><command>./Setup.lhs build</command></entry>
                    <Entry><para>Make this package ready for installation.  For a true compiler,
		  this step involves compiling the Haskell source code.  Even for an interpreter, however, 
		  it may involve running a pre-processor.</para></entry></row>


     <row><Entry><command>./Setup.lhs install [install-prefix]</command></entry>
                    <Entry><para>Copy the files into the install locations, and register
		  the package with the compiler.</para></entry></row>

     <row><Entry><cmdsynopsis>
		  <command>./Setup.lhs register</command><sbr>
		  <command>./Setup.lhs unregister</command>
	      </cmdsynopsis>
	      </entry>
                    <Entry><para>Register (or un-register) this package with the
                    compiler.  (NB: registration is also done automatically by <function>install</function>.)
		</para></entry></row>

     <row><Entry><command>./Setup.lhs test</command></entry>
                    <Entry><para>Run the package's test suite.</para></entry></row>

     </tbody></tgroup>
     </table>
    </para>

<sect3><title><function>setup configure</function></title>

<para>The command <function>./Setup.lhs configure</function> prepares
to build the package.  For sophisticated packages, the configure step
may perform elaborate checks, to gather information about the target
system.  It may write a file to record its results, but the name and
format of this file are not part of the specification.  For wrapped
make-based systems (for instance), a command-line parser that
understands the standard targets will be provided.

      </para>
<para>
All flags are optional.  The flags are these:
<itemizedlist>
<listitem><para><function>--with-compiler=</function><replaceable>path</replaceable>, 
<function>--ghc</function>, 
<function>--nhc</function>, 
<function>--hugs</function>: 
specifies which compiler to use.  At most one of the value of these flags may be specified.
The configure step checks
that the compiler is available, in a sufficiently up-to-date form for the package, and that the package
expects to work with that compiler.  If the compiler name
is not specified, <function>setup</function> will choose one; some packages will come with one compiler baked in.
</para>
	  </listitem>
<listitem><para><function>--prefix=</function><replaceable>path</replaceable>: specifies where the installed files
for the package should be installed.  Typically on Unix this will be <function>/usr/local</function> and
on Windows it will be <function>Program Files</function>.  The setup script will use a sensible default
(often platform-specific) if the flag is not specified.
	    </para>
	  </listitem>

<listitem><para>Unrecognized flags are errors in the default build system, but may be meaningful to wrapped make-based systems (for instance).  Therefore, the provided command-line parser will pass unrecognized command-line flags on to the wrapped system.</para></listitem>

        </itemizedlist>
It is OK for these flags to be "baked into" the compiled library.  In particular, the build system may
bake the installation path into the compiled files.  There is no provision for installing the compiled
files anywhere other than the place specified in the <function>configure</function> step.
      </para>
    </sect3>

<sect3><title><function>setup build</function></title>

<para>The command <function>./Setup.lhs build</function> makes this
package, ready for installation.  It takes no flags.</para>

    </sect3>

<sect3><title><function>setup install</function></title>

<para>The command <function>./Setup.lhs install</function> copies files from the built package to
the right location for installed files, specified in the configure step.  Then it registers the new package with
the compiler, using the <function>hc-pkg</function> command.
<itemizedlist>
<listitem><para><function>--install-prefix=</function><replaceable>path</replaceable> has three effects. 
First, it over-rides the <function>--prefix</function> flag specified in the <function>configure</function> step,
providing an alternative location.  Second, it does not call <function>hc-pkg</function> to register the package.
Instead, third, it creates an installed package description file, <filename>installed-pkg-descr</filename>, 
which can later be fed to <function>hc-pkg</function>.
	    </para>
	  </listitem>

<listitem><para><function>--shared</function>: if present, this flag is passed to <function>hc-pkg</function> 
so that the package is registed as shared.  This flag has no effect if <function>--install-prefix</function> is
used, because in that case <function>hc-pkg</function> is not called.
	    </para>
	  </listitem>
	</itemizedlist>

</para>
<para>The reason for the <option>--install-prefix</option> flag is that Roland RPM 
wants to create an exact installation tree, all ready
to bundle up for the target machine, <emphasis>but in a temporary location</emphasis>. He cannot use
this location for <function>--prefix</function> in the <function>configure</function> step, because that
might bake the wrong path into some compiled files.  Nor does he want to register this temporary tree with the compiler
on his machine. Instead, he bundles up the temporary installation tree, plus the <filename>installed-pkg-descr</filename>,
and ships them all to the target machine.  When they are installed there, the post-installation script runs
<function>hc-pkg</function> on the <filename>installed-pkg-descr</filename> file.
	    </para>
<para>Note that there is no <command>uninstall</command> command in the setup script.  
Why not?  Because... <emphasis>Someone tell me why not!</emphasis></para>


    </sect3>

<sect3><title><function>setup register</function> and <function>setup unregister</function></title>

<para>The command <function>./Setup.lhs register</function> registers the now-installed package with the compiler.
Similarly, <function>./Setup.lhs unregister</function> un-registers the package.
<itemizedlist>
<listitem><para><function>--shared</function>: registers/un-registers a shared package as shared.  
The default is to treat the package as a user package.</para>
	  </listitem>
	</itemizedlist></para>

    </sect3>
    </sect2>

<sect2><title>Examples</title>
<sect3><title>Bob the Builder and Sam Sysadmin</title>

<para>Bob the Builder can install a HPS source distribution thus.
He downloads the source distribution and unpacks it into a temporary directory,
<function>cd</function>'s to that directory, and says
<programlisting>
  ./Setup.lhs configure --ghc
  ./Setup.lhs build
  ./Setup.lhs install
</programlisting>
Similarly, Sam Sysadmin does exactly the same, except that he says 
<programlisting>
  ./Setup.lhs install --shared
</programlisting>
in the final step, so that the package is installed where all users will see it.
      </para>
<para>For a binary distribution, both Bob and Sam would omit the first two steps, and just do the install step.</para>
</sect3>

<sect3><title>System packagers (Debian, RPM etc)</title>

<para>System packagers, such as Donald Debian, will run the configure and build steps just like Bob and Sam.
A that point, Donald will say
<programlisting>
  ./Setup.lhs install --install-prefix=/tmp/donald
</programlisting>
to construct a ready-to-zip tree of all the installed files, plus a file <filename>installed-pkg-descr</filename>
that describes the installed package.  He arranges to deliver both these components to the target machine,
and then feed <filename>installed-pkg-descr</filename> to <function>hc-pkg</function> on the target machine.
      </para>
<para>
The file <filename>installed-pkg-descr</filename> also contains information he needs for building
his Debian distribution, namely the
package name, version, and (exact) dependencies.
      </para>
      </sect3>
   </sect2>
</sect1>

<sect1 id=sbi><title>The HPS simple build infrastructure</title>

<para>A package author must fulfil the specification of <xref linkend=setup>.
In many cases, a Haskell package will consist of nothing more than a bunch of Haskell modules,
with perhaps the odd C file.  In that case, the HPS provides a <emphasis>simple build infrastructure</emphasis> that
fulfils the specification of <xref linkend=setup>, and provides some modest further facilities besides.</para>
<para>This simple build infrastructure is meant to automate the common case.
(Think <function>hmake</function>.)  The emphasis is on ``simple'': 
if you want something more elaborate, you can (a) modify the simple build infrastructure (which is written in Haskell)
(b) use makefiles, or (c) implement something else entirely.
</para>

<sect2><title>Overview</title>
<para>
The simple build infrastructure works as follows.  First, Angela puts the following Hasell 
file <function>Setup.lhs</function> in the
root of her tree:
<programlisting>
  #! runghc
  > import Distribution.Simple
</programlisting>
Second, she writes a package description <function>pkg.desc</function> in the syntax of <xref linkend="sbi-pkg-desc">,
which describes the package and gives extra information to the simple build infrastructure.
</para><para>
Now Angela can build her package by saying
<programlisting>
  ./Setup.lhs configure
  ./Setup.lhs build
</programlisting>
She can even install it on her own machine by saying
<programlisting>
  ./Setup.lhs install
</programlisting>
She can build a HPS source distribution:
<programlisting>
  ./Setup.lhs source-dist
</programlisting>
The full details are given in <xref linkend="sbi-setup">.  
</para><para>
It is no coincidence that the interface is very similar to that for the setup script
for an HPS package distribution (<xref linkend="setup">).
In fact, <function>Distribution.Simple.defaultMain</function> conforms to the specification of <xref linkend="setup-spec">, and when it builds
a distribution, it includes <function>./Setup.lhs</function> in the tarball, ready to be run by Bob the Builder.
However, <function>Distribution.Simple.defaultMain</function> of course implements a richer interface than that required by
<xref linkend="setup-spec">, becuase it's intended to support Angela as well as Bob.
The full specification is in <xref linkend="sbi-setup">.
</para>
    </sect2>
<sect2 id=sbi-pkg-desc><title>Package description in the simple build infrastructure</title>

<para>When using the simple build infrastructure, the package description file <function>pkg.desc</function>
contains not only the name of the package, its version and dependencies, but also a collection of information 
to explain to the simple build infrastructure how to build the package.  This section gives the syntax.</para>

<para>For now, we just have a checklist of what must be there
<itemizedlist>
<listitem><para><function>exposed:</function> Exposed modules</para>
	    </listitem>
<listitem><para><function>hidden:</function> Hidden (or internal) modules</para>
	    </listitem>
<listitem><para><function>ghc-flags:</function>,<function>hugs-flags:</function>,<function>nhc-flags:</function>  
		Extra compiler flags for GHC, Hugs, nhc.</para>
	    </listitem>
<listitem><para> <emphasis>What else?</emphasis></para>
	  </listitem></itemizedlist></para>

<para>Why must we list hidden modules? 
These modules form part of the implementation of the package, but not its interface: a
client of the package cannot import an internal module.  They must nevertheless be listed
explicitly for two reasons: (a) to allow the global program invariant to be 
checked (see <xref linkend="packages-and-haskell">)
and (b) to enable a build system or programming environment to find the source files.
	</para>
</sect2>

<sect2 id="sbi-setup"><title><function>Distribution.Simple</function></title>

<para>This section gives the command line interface supported by <function>Distribution.Simple.main</function>.
It supports all the commands described in <xref linkend=setup-spec>, and in addition the following:
     <table frame=all><title>Extra commands supported by the simple build infrastructure setup script</title>

     <tgroup cols=2 align=left colsep=1 rowsep=1>     <tbody>

     <row><Entry><command>./Setup.lhs woggle </command></entry>
         <Entry><para> Blurgh </para></entry></row>

     </tbody></tgroup>
     </table>
    </para>
</sect2>

<sect2><title>The Makefile route</title>

<para>The Haskell libraries that support the simple build infrastructure can, of course, also
be re-used to make setup scripts that work quite differently.  At one extreme is a setup
script that immediately shells out into <function>make</function>, which does all the work.</para>

<para>To support this, HPS provides a trivial setup library <function>Distribution.Make</function>, which
simply parses the command line arguments and shells out into <function>make</function>.  Marcus uses the following
<function>Setup.lhs</function>
<programlisting>
  module Main where
  import Distribution.Make
  main = setup
</programlisting>
All the package description information is assumed to be known to the makefile system, and so does not
appear in the setup script.
Thus, 
<programlisting>
  setup configure --ghc
</programlisting>
invokes
<programlisting>
  ./configure --with-hc=ghc
</programlisting>
Similarly
<function>setup build</function>
invokes
<function>make all</function> And so on.</para>

<para>Marcus simply arranges that when his makefiles build a distribution, they include this simple setup script in
the root of the distribution, where the Bob the Builder expects to find it.</para>

<note><para> Isaac isn't sure that we can provide much value here
beside providing a standard command-line parser (which is pretty good
at least).  I think it might be good to offer a boilerplate Setup.lhs
file for makefiles, but implementing it in a library is a bit
overkill.  Perhaps in the future, if the build system is delivered w/
fptools or something, we could provide an API to wrap
that. </para> <para>Simon says: fine -- but we should make it easy for Marcus Makefile.  What do we offer?</para></note>


</sect2>

</sect1>



<!-- Appendix: Related Systems --------------------------------- -->
<appendix><title>Related Systems</title>

<para>I will try to outline interesting points in a variety of systems
that we can learn from.  These systems may be intended for building or
installing packages, or repositories for packages.  I am not deeply
familiar with all of the tools here, and would be interested in
hearing more relevant points from someone with more knowledge.
Another weakness of mine is that I don't know much about Microsoft
Windows, so some good examples for Windows systems would be
helpful.</para>

<section id="lip-appendix-debian"><Title>Debian</title>

<para>
The <ulink url="http://www.debian.org">Debian GNU/Linux system</ulink>
is a good example of a <emphasis>binary</emphasis> distribution
(meaning that packages are distributed in binary, as opposed to source
code form), and its packaging system (<application>dpkg</application>)
is somewhat similar to the more famous <application>RPM</application>.
Debian has several other tools to help the user to install packages,
most notably, <command>apt</command>.  The Debian toolset is
interesting for several reasons:

<itemizedList>

  <listItem><para>It handles dependencies extremely well.  A single
  command can download and install a package, as well as downloading
  and installing all of its dependencies.</para></listItem>

  <listItem><para>It handles updates extremely well.  One command
  (<command>apt-get update</command>) checks for new versions of
  packages and updates a local database.  Another command
  (<command>apt-get dist-upgrade</command>) downloads and installs all
  new versions of installed packages and any new
  dependencies.</para></listItem>


  <listItem><para>There are standard commands for downloading and
  building packages from source.  If I'm interested in hacking on a
  package, I can run <command>apt-get source packagename</command>
  which will download and unpack the source code for the package.  The
  source can then be built with the standard command
  <command>debuild</command>.</para></listItem>


  <listItem><para>The Debian Project maintains a central repository
  for packages, and the packaging tools offer support for using
  unofficial repositories as well.  The central repositories include a
  set of servers, the <emphasis>autobuilders</emphasis>, which compile
  uploaded source packages for a variety of hardware architectures
For faster browsing, not all history is shown. View entire blame