Commit 383733b9 authored by Peter Trommler's avatar Peter Trommler 🥁 Committed by Austin Seipp

Fix obscure problem with using the system linker (#8935)

Summary:
In a statically linked GHCi symbol `environ` resolves to NULL when
called from a Haskell script.

When resolving symbols in a Haskell script we need to search the
executable program and its dependent (DT_NEEDED) shared libraries
first and then search the loaded libraries.

We want to be able to override functions in loaded libraries later.
Libraries must be opened with local scope (RTLD_LOCAL) and not global.
The latter adds all symbols to the executable program's symbols where
they are then searched in loading order. We want reverse loading order.

When libraries are loaded with local scope the dynamic linker
cannot use symbols in that library when resolving the dependencies
in another shared library. This changes the way files compiled to
object code must be linked into temporary shared libraries. We link
with the last temporary shared library created so far if it exists.
Since each temporary shared library is linked to the previous temporary
shared library the dynamic linker finds the latest definition of a
symbol by following the dependency chain.

See also Note [RTLD_LOCAL] for a summary of the problem and solution.

Cherry-picked commit 2f8b4c

Changed linker argument ordering

On some ELF systems GNU ld (and others?) default to
--as-needed and the order of libraries in the link
matters.

The last temporary shared library, must appear
before all other libraries. Switching the position
of extra_ld_inputs and lib_path_objs does that.

Fixes #8935 and #9186

Reviewers: austin, hvr, rwbarton, simonmar

Reviewed By: simonmar

Subscribers: thomie, carter, simonmar

Differential Revision: https://phabricator.haskell.org/D349

GHC Trac Issues: #8935, #9186, #9480
parent 643635ea
...@@ -117,8 +117,12 @@ data PersistentLinkerState ...@@ -117,8 +117,12 @@ data PersistentLinkerState
-- The currently-loaded packages; always object code -- The currently-loaded packages; always object code
-- Held, as usual, in dependency order; though I am not sure if -- Held, as usual, in dependency order; though I am not sure if
-- that is really important -- that is really important
pkgs_loaded :: ![PackageKey] pkgs_loaded :: ![PackageKey],
}
-- we need to remember the name of the last temporary DLL/.so
-- so we can link it
last_temp_so :: !(Maybe FilePath) }
emptyPLS :: DynFlags -> PersistentLinkerState emptyPLS :: DynFlags -> PersistentLinkerState
emptyPLS _ = PersistentLinkerState { emptyPLS _ = PersistentLinkerState {
...@@ -126,7 +130,8 @@ emptyPLS _ = PersistentLinkerState { ...@@ -126,7 +130,8 @@ emptyPLS _ = PersistentLinkerState {
itbl_env = emptyNameEnv, itbl_env = emptyNameEnv,
pkgs_loaded = init_pkgs, pkgs_loaded = init_pkgs,
bcos_loaded = [], bcos_loaded = [],
objs_loaded = [] } objs_loaded = [],
last_temp_so = Nothing }
-- Packages that don't need loading, because the compiler -- Packages that don't need loading, because the compiler
-- shares them with the interpreted program. -- shares them with the interpreted program.
...@@ -316,14 +321,15 @@ linkCmdLineLibs' dflags@(DynFlags { ldInputs = cmdline_ld_inputs ...@@ -316,14 +321,15 @@ linkCmdLineLibs' dflags@(DynFlags { ldInputs = cmdline_ld_inputs
; if null cmdline_lib_specs then return pls ; if null cmdline_lib_specs then return pls
else do else do
{ mapM_ (preloadLib dflags lib_paths framework_paths) cmdline_lib_specs { pls1 <- foldM (preloadLib dflags lib_paths framework_paths) pls
cmdline_lib_specs
; maybePutStr dflags "final link ... " ; maybePutStr dflags "final link ... "
; ok <- resolveObjs ; ok <- resolveObjs
; if succeeded ok then maybePutStrLn dflags "done" ; if succeeded ok then maybePutStrLn dflags "done"
else throwGhcExceptionIO (ProgramError "linking extra libraries/objects failed") else throwGhcExceptionIO (ProgramError "linking extra libraries/objects failed")
; return pls ; return pls1
}} }}
...@@ -362,19 +368,22 @@ classifyLdInput dflags f ...@@ -362,19 +368,22 @@ classifyLdInput dflags f
return Nothing return Nothing
where platform = targetPlatform dflags where platform = targetPlatform dflags
preloadLib :: DynFlags -> [String] -> [String] -> LibrarySpec -> IO () preloadLib :: DynFlags -> [String] -> [String] -> PersistentLinkerState
preloadLib dflags lib_paths framework_paths lib_spec -> LibrarySpec -> IO (PersistentLinkerState)
preloadLib dflags lib_paths framework_paths pls lib_spec
= do maybePutStr dflags ("Loading object " ++ showLS lib_spec ++ " ... ") = do maybePutStr dflags ("Loading object " ++ showLS lib_spec ++ " ... ")
case lib_spec of case lib_spec of
Object static_ish Object static_ish
-> do b <- preload_static lib_paths static_ish -> do (b, pls1) <- preload_static lib_paths static_ish
maybePutStrLn dflags (if b then "done" maybePutStrLn dflags (if b then "done"
else "not found") else "not found")
return pls1
Archive static_ish Archive static_ish
-> do b <- preload_static_archive lib_paths static_ish -> do b <- preload_static_archive lib_paths static_ish
maybePutStrLn dflags (if b then "done" maybePutStrLn dflags (if b then "done"
else "not found") else "not found")
return pls
DLL dll_unadorned DLL dll_unadorned
-> do maybe_errstr <- loadDLL (mkSOName platform dll_unadorned) -> do maybe_errstr <- loadDLL (mkSOName platform dll_unadorned)
...@@ -390,12 +399,14 @@ preloadLib dflags lib_paths framework_paths lib_spec ...@@ -390,12 +399,14 @@ preloadLib dflags lib_paths framework_paths lib_spec
case err2 of case err2 of
Nothing -> maybePutStrLn dflags "done" Nothing -> maybePutStrLn dflags "done"
Just _ -> preloadFailed mm lib_paths lib_spec Just _ -> preloadFailed mm lib_paths lib_spec
return pls
DLLPath dll_path DLLPath dll_path
-> do maybe_errstr <- loadDLL dll_path -> do maybe_errstr <- loadDLL dll_path
case maybe_errstr of case maybe_errstr of
Nothing -> maybePutStrLn dflags "done" Nothing -> maybePutStrLn dflags "done"
Just mm -> preloadFailed mm lib_paths lib_spec Just mm -> preloadFailed mm lib_paths lib_spec
return pls
Framework framework -> Framework framework ->
if platformUsesFrameworks (targetPlatform dflags) if platformUsesFrameworks (targetPlatform dflags)
...@@ -403,6 +414,7 @@ preloadLib dflags lib_paths framework_paths lib_spec ...@@ -403,6 +414,7 @@ preloadLib dflags lib_paths framework_paths lib_spec
case maybe_errstr of case maybe_errstr of
Nothing -> maybePutStrLn dflags "done" Nothing -> maybePutStrLn dflags "done"
Just mm -> preloadFailed mm framework_paths lib_spec Just mm -> preloadFailed mm framework_paths lib_spec
return pls
else panic "preloadLib Framework" else panic "preloadLib Framework"
where where
...@@ -422,11 +434,13 @@ preloadLib dflags lib_paths framework_paths lib_spec ...@@ -422,11 +434,13 @@ preloadLib dflags lib_paths framework_paths lib_spec
-- Not interested in the paths in the static case. -- Not interested in the paths in the static case.
preload_static _paths name preload_static _paths name
= do b <- doesFileExist name = do b <- doesFileExist name
if not b then return False if not b then return (False, pls)
else do if dynamicGhc else if dynamicGhc
then dynLoadObjs dflags [name] then do pls1 <- dynLoadObjs dflags pls [name]
else loadObj name return (True, pls1)
return True else do loadObj name
return (True, pls)
preload_static_archive _paths name preload_static_archive _paths name
= do b <- doesFileExist name = do b <- doesFileExist name
if not b then return False if not b then return False
...@@ -784,8 +798,8 @@ dynLinkObjs dflags pls objs = do ...@@ -784,8 +798,8 @@ dynLinkObjs dflags pls objs = do
wanted_objs = map nameOfObject unlinkeds wanted_objs = map nameOfObject unlinkeds
if dynamicGhc if dynamicGhc
then do dynLoadObjs dflags wanted_objs then do pls2 <- dynLoadObjs dflags pls1 wanted_objs
return (pls1, Succeeded) return (pls2, Succeeded)
else do mapM_ loadObj wanted_objs else do mapM_ loadObj wanted_objs
-- Link them all together -- Link them all together
...@@ -799,9 +813,11 @@ dynLinkObjs dflags pls objs = do ...@@ -799,9 +813,11 @@ dynLinkObjs dflags pls objs = do
pls2 <- unload_wkr dflags [] pls1 pls2 <- unload_wkr dflags [] pls1
return (pls2, Failed) return (pls2, Failed)
dynLoadObjs :: DynFlags -> [FilePath] -> IO ()
dynLoadObjs _ [] = return () dynLoadObjs :: DynFlags -> PersistentLinkerState -> [FilePath]
dynLoadObjs dflags objs = do -> IO PersistentLinkerState
dynLoadObjs _ pls [] = return pls
dynLoadObjs dflags pls objs = do
let platform = targetPlatform dflags let platform = targetPlatform dflags
soFile <- newTempName dflags (soExt platform) soFile <- newTempName dflags (soExt platform)
let -- When running TH for a non-dynamic way, we still need to make let -- When running TH for a non-dynamic way, we still need to make
...@@ -809,10 +825,22 @@ dynLoadObjs dflags objs = do ...@@ -809,10 +825,22 @@ dynLoadObjs dflags objs = do
-- Opt_Static off -- Opt_Static off
dflags1 = gopt_unset dflags Opt_Static dflags1 = gopt_unset dflags Opt_Static
dflags2 = dflags1 { dflags2 = dflags1 {
-- We don't want to link the ldInputs in; we'll -- We don't want the original ldInputs in
-- be calling dynLoadObjs with any objects that -- (they're already linked in), but we do want
-- need to be linked. -- to link against the previous dynLoadObjs
ldInputs = [], -- library if there was one, so that the linker
-- can resolve dependencies when it loads this
-- library.
ldInputs =
case last_temp_so pls of
Nothing -> []
Just so ->
let (lp, l) = splitFileName so in
[ Option ("-L" ++ lp)
, Option ("-Wl,-rpath")
, Option ("-Wl," ++ lp)
, Option ("-l:" ++ l)
],
-- Even if we're e.g. profiling, we still want -- Even if we're e.g. profiling, we still want
-- the vanilla dynamic libraries, so we set the -- the vanilla dynamic libraries, so we set the
-- ways / build tag to be just WayDyn. -- ways / build tag to be just WayDyn.
...@@ -824,7 +852,7 @@ dynLoadObjs dflags objs = do ...@@ -824,7 +852,7 @@ dynLoadObjs dflags objs = do
consIORef (filesToNotIntermediateClean dflags) soFile consIORef (filesToNotIntermediateClean dflags) soFile
m <- loadDLL soFile m <- loadDLL soFile
case m of case m of
Nothing -> return () Nothing -> return pls { last_temp_so = Just soFile }
Just err -> panic ("Loading temp shared object failed: " ++ err) Just err -> panic ("Loading temp shared object failed: " ++ err)
rmDupLinkables :: [Linkable] -- Already loaded rmDupLinkables :: [Linkable] -- Already loaded
......
...@@ -1468,6 +1468,7 @@ linkDynLib dflags0 o_files dep_packages ...@@ -1468,6 +1468,7 @@ linkDynLib dflags0 o_files dep_packages
in package_hs_libs ++ extra_libs ++ other_flags in package_hs_libs ++ extra_libs ++ other_flags
-- probably _stub.o files -- probably _stub.o files
-- and last temporary shared object file
let extra_ld_inputs = ldInputs dflags let extra_ld_inputs = ldInputs dflags
case os of case os of
...@@ -1585,8 +1586,8 @@ linkDynLib dflags0 o_files dep_packages ...@@ -1585,8 +1586,8 @@ linkDynLib dflags0 o_files dep_packages
-- Set the library soname. We use -h rather than -soname as -- Set the library soname. We use -h rather than -soname as
-- Solaris 10 doesn't support the latter: -- Solaris 10 doesn't support the latter:
++ [ Option ("-Wl,-h," ++ takeFileName output_fn) ] ++ [ Option ("-Wl,-h," ++ takeFileName output_fn) ]
++ map Option lib_path_opts
++ extra_ld_inputs ++ extra_ld_inputs
++ map Option lib_path_opts
++ map Option pkg_lib_path_opts ++ map Option pkg_lib_path_opts
++ map Option pkg_link_opts ++ map Option pkg_link_opts
) )
......
...@@ -1839,7 +1839,7 @@ internal_dlopen(const char *dll_name) ...@@ -1839,7 +1839,7 @@ internal_dlopen(const char *dll_name)
// (see POSIX also) // (see POSIX also)
ACQUIRE_LOCK(&dl_mutex); ACQUIRE_LOCK(&dl_mutex);
hdl = dlopen(dll_name, RTLD_LAZY | RTLD_GLOBAL); hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
errmsg = NULL; errmsg = NULL;
if (hdl == NULL) { if (hdl == NULL) {
...@@ -1849,11 +1849,12 @@ internal_dlopen(const char *dll_name) ...@@ -1849,11 +1849,12 @@ internal_dlopen(const char *dll_name)
errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL"); errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
strcpy(errmsg_copy, errmsg); strcpy(errmsg_copy, errmsg);
errmsg = errmsg_copy; errmsg = errmsg_copy;
} else {
o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
o_so->handle = hdl;
o_so->next = openedSOs;
openedSOs = o_so;
} }
o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
o_so->handle = hdl;
o_so->next = openedSOs;
openedSOs = o_so;
RELEASE_LOCK(&dl_mutex); RELEASE_LOCK(&dl_mutex);
//--------------- End critical section ------------------- //--------------- End critical section -------------------
...@@ -1861,14 +1862,39 @@ internal_dlopen(const char *dll_name) ...@@ -1861,14 +1862,39 @@ internal_dlopen(const char *dll_name)
return errmsg; return errmsg;
} }
/*
Note [RTLD_LOCAL]
In GHCi we want to be able to override previous .so's with newly
loaded .so's when we recompile something. This further implies that
when we look up a symbol in internal_dlsym() we have to iterate
through the loaded libraries (in order from most recently loaded to
oldest) looking up the symbol in each one until we find it.
However, this can cause problems for some symbols that are copied
by the linker into the executable image at runtime - see #8935 for a
lengthy discussion. To solve that problem we need to look up
symbols in the main executable *first*, before attempting to look
them up in the loaded .so's. But in order to make that work, we
have to always call dlopen with RTLD_LOCAL, so that the loaded
libraries don't populate the global symbol table.
*/
static void * static void *
internal_dlsym(void *hdl, const char *symbol) { internal_dlsym(const char *symbol) {
OpenedSO* o_so; OpenedSO* o_so;
void *v; void *v;
// We acquire dl_mutex as concurrent dl* calls may alter dlerror // We acquire dl_mutex as concurrent dl* calls may alter dlerror
ACQUIRE_LOCK(&dl_mutex); ACQUIRE_LOCK(&dl_mutex);
dlerror(); dlerror();
// look in program first
v = dlsym(dl_prog_handle, symbol);
if (dlerror() == NULL) {
RELEASE_LOCK(&dl_mutex);
return v;
}
for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) { for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
v = dlsym(o_so->handle, symbol); v = dlsym(o_so->handle, symbol);
if (dlerror() == NULL) { if (dlerror() == NULL) {
...@@ -1876,7 +1902,6 @@ internal_dlsym(void *hdl, const char *symbol) { ...@@ -1876,7 +1902,6 @@ internal_dlsym(void *hdl, const char *symbol) {
return v; return v;
} }
} }
v = dlsym(hdl, symbol);
RELEASE_LOCK(&dl_mutex); RELEASE_LOCK(&dl_mutex);
return v; return v;
} }
...@@ -2036,7 +2061,7 @@ static void* lookupSymbol_ (char *lbl) ...@@ -2036,7 +2061,7 @@ static void* lookupSymbol_ (char *lbl)
if (!ghciLookupSymbolTable(symhash, lbl, &val)) { if (!ghciLookupSymbolTable(symhash, lbl, &val)) {
IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n")); IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
# if defined(OBJFORMAT_ELF) # if defined(OBJFORMAT_ELF)
return internal_dlsym(dl_prog_handle, lbl); return internal_dlsym(lbl);
# elif defined(OBJFORMAT_MACHO) # elif defined(OBJFORMAT_MACHO)
# if HAVE_DLFCN_H # if HAVE_DLFCN_H
/* On OS X 10.3 and later, we use dlsym instead of the old legacy /* On OS X 10.3 and later, we use dlsym instead of the old legacy
...@@ -2050,7 +2075,7 @@ static void* lookupSymbol_ (char *lbl) ...@@ -2050,7 +2075,7 @@ static void* lookupSymbol_ (char *lbl)
*/ */
IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl)); IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
ASSERT(lbl[0] == '_'); ASSERT(lbl[0] == '_');
return internal_dlsym(dl_prog_handle, lbl + 1); return internal_dlsym(lbl + 1);
# else # else
if (NSIsSymbolNameDefined(lbl)) { if (NSIsSymbolNameDefined(lbl)) {
NSSymbol symbol = NSLookupAndBindSymbol(lbl); NSSymbol symbol = NSLookupAndBindSymbol(lbl);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment