diff --git a/rts/Linker.c b/rts/Linker.c
index 443de6a356e3090d89eb56c0d5703b8e8cfd50d1..d879c86acbf882dedccbe26bf03cbcc4eb1f0c00 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -188,7 +188,7 @@ int ocTryLoad( ObjectCode* oc );
  *
  * MAP_32BIT not available on OpenBSD/amd64
  */
-#if defined(MAP_32BIT) && defined(x86_64_HOST_ARCH)
+#if defined(MAP_32BIT) && (defined(x86_64_HOST_ARCH) || (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH)))
 #define MAP_LOW_MEM
 #define TRY_MAP_32BIT MAP_32BIT
 #else
@@ -214,10 +214,22 @@ int ocTryLoad( ObjectCode* oc );
  * systems, we have to pick a base address in the low 2Gb of the address space
  * and try to allocate memory from there.
  *
+ * The same holds for aarch64, where the default, even with PIC, model
+ * is 4GB. The linker is free to emit AARCH64_ADR_PREL_PG_HI21
+ * relocations.
+ *
  * We pick a default address based on the OS, but also make this
  * configurable via an RTS flag (+RTS -xm)
  */
-#if defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
+
+#if (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH))
+// Try to use stg_upd_frame_info as the base. We need to be within +-4GB of that
+// address, otherwise we violate the aarch64 memory model. Any object we load
+// can potentially reference any of the ones we bake into the binary (and list)
+// in RtsSymbols. Thus we'll need to be within +-4GB of those,
+// stg_upd_frame_info is a good candidate as it's referenced often.
+#define MMAP_32BIT_BASE_DEFAULT (void*)&stg_upd_frame_info;
+#elif defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
 // Try to use MAP_32BIT
 #define MMAP_32BIT_BASE_DEFAULT 0
 #else
@@ -1040,11 +1052,47 @@ resolveSymbolAddr (pathchar* buffer, int size,
 }
 
 #if RTS_LINKER_USE_MMAP
+
+/* -----------------------------------------------------------------------------
+   Occationally we depend on mmap'd region being close to already mmap'd regions.
+
+   Our static in-memory linker may be restricted by the architectures relocation
+   range. E.g. aarch64 has a +-4GB range for PIC code, thus we'd preferrably
+   get memory for the linker close to existing mappings.  mmap on it's own is
+   free to return any memory location, independent of what the preferred
+   location argument indicates.
+
+   For example mmap (via qemu) might give you addresses all over the available
+   memory range if the requested location is already occupied.
+
+   mmap_next will do a linear search from the start page upwards to find a
+   suitable location that is as close as possible to the locations (proivded
+   via the first argument).
+   -------------------------------------------------------------------------- */
+
+void*
+mmap_next(void *addr, size_t length, int prot, int flags, int fd, off_t offset) {
+  if(addr == NULL) return mmap(addr, length, prot, flags, fd, offset);
+  // we are going to look for up to pageSize * 1024 * 1024 (4GB) from the
+  // address.
+  size_t pageSize = getPageSize();
+  for(int i = (uintptr_t)addr & (pageSize-1) ? 1 : 0; i < 1024*1024; i++) {
+    void *target = (void*)(((uintptr_t)addr & ~(pageSize-1))+(i*pageSize));
+    void *mem = mmap(target, length, prot, flags, fd, offset);
+    if(mem == NULL) return mem;
+    if(mem == target) return mem;
+    munmap(mem, length);
+    IF_DEBUG(linker && (i % 1024 == 0),
+      debugBelch("mmap_next failed to find suitable space in %p - %p\n", addr, target));
+  }
+  return NULL;
+}
+
 //
 // Returns NULL on failure.
 //
 void *
-mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
+mmapForLinker (size_t bytes, uint32_t prot, uint32_t flags, int fd, int offset)
 {
    void *map_addr = NULL;
    void *result;
@@ -1065,15 +1113,14 @@ mmap_again:
        map_addr = mmap_32bit_base;
    }
 
-   const int prot = PROT_READ | PROT_WRITE;
    IF_DEBUG(linker,
             debugBelch("mmapForLinker: \tprotection %#0x\n", prot));
    IF_DEBUG(linker,
             debugBelch("mmapForLinker: \tflags      %#0x\n",
                        MAP_PRIVATE | tryMap32Bit | fixed | flags));
 
-   result = mmap(map_addr, size, prot,
-                 MAP_PRIVATE|tryMap32Bit|fixed|flags, fd, offset);
+   result = mmap_next(map_addr, size, prot,
+                      MAP_PRIVATE|tryMap32Bit|fixed|flags, fd, offset);
 
    if (result == MAP_FAILED) {
        sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
@@ -1126,6 +1173,28 @@ mmap_again:
            goto mmap_again;
        }
    }
+#elif (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH))
+    // for aarch64 we need to make sure we stay within 4GB of the
+    // mmap_32bit_base, and we also do not want to update it.
+//    if (mmap_32bit_base != (void*)&stg_upd_frame_info) {
+    if (result == map_addr) {
+        mmap_32bit_base = (void*)((uintptr_t)map_addr + size);
+    } else {
+        // upper limit 4GB - size of the object file - 1mb wiggle room.
+        if(llabs((uintptr_t)result - (uintptr_t)&stg_upd_frame_info) > (2<<32) - size - (2<<20)) {
+            // not within range :(
+            debugTrace(DEBUG_linker,
+                        "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
+                        bytes, result);
+            munmap(result, size);
+            // TODO: some abort/mmap_32bit_base recomputation based on
+            //       if mmap_32bit_base is changed, or still at stg_upd_frame_info
+            goto mmap_again;
+        } else {
+            mmap_32bit_base = (void*)((uintptr_t)result + size);
+        }
+    }
+//   }
 #endif
 
    IF_DEBUG(linker,
@@ -1454,9 +1523,9 @@ preloadObjectFile (pathchar *path)
     * See also the misalignment logic for darwin below.
     */
 #if defined(ios_HOST_OS)
-   image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+   image = mmapForLinker(fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
 #else
-   image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
+   image = mmapForLinker(fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
                 MAP_PRIVATE, fd, 0);
 #endif
 
diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h
index cf0cd35167444a7ddb02abf56f6831ea3c2fd8a1..1f63f0c485185f4bfc70524932febd76a66e601c 100644
--- a/rts/LinkerInternals.h
+++ b/rts/LinkerInternals.h
@@ -14,6 +14,7 @@
 
 #if RTS_LINKER_USE_MMAP
 #include <sys/mman.h>
+void* mmap_next(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
 #endif
 
 void printLoadedObjects(void);
@@ -293,7 +294,7 @@ void exitLinker( void );
 void freeObjectCode (ObjectCode *oc);
 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo);
 
-void *mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset);
+void *mmapForLinker (size_t bytes, uint32_t prot, uint32_t flags, int fd, int offset);
 void mmapForLinkerMarkExecutable (void *start, size_t len);
 
 void addProddableBlock ( ObjectCode* oc, void* start, int size );
diff --git a/rts/linker/Elf.c b/rts/linker/Elf.c
index 3c0ace3a4b93ad916796c65e43bd9587dfea15a8..722643d32624773ab9d4ba405ccf0666d1e8ac34 100644
--- a/rts/linker/Elf.c
+++ b/rts/linker/Elf.c
@@ -637,7 +637,7 @@ mapObjectFileSection (int fd, Elf_Word offset, Elf_Word size,
 
     pageOffset = roundDownToPage(offset);
     pageSize = roundUpToPage(offset-pageOffset+size);
-    p = mmapForLinker(pageSize, 0, fd, pageOffset);
+    p = mmapForLinker(pageSize, PROT_READ | PROT_WRITE, 0, fd, pageOffset);
     if (p == NULL) return NULL;
     *mapped_size = pageSize;
     *mapped_offset = pageOffset;
@@ -709,7 +709,7 @@ ocGetNames_ELF ( ObjectCode* oc )
                * address might be out of range for sections that are mmaped.
                */
               alloc = SECTION_MMAP;
-              start = mmapForLinker(size, MAP_ANONYMOUS, -1, 0);
+              start = mmapForLinker(size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS, -1, 0);
               mapped_start = start;
               mapped_offset = 0;
               mapped_size = roundUpToPage(size);
@@ -751,8 +751,9 @@ ocGetNames_ELF ( ObjectCode* oc )
           unsigned nstubs = numberOfStubsForSection(oc, i);
           unsigned stub_space = STUB_SIZE * nstubs;
 
-          void * mem = mmapForLinker(size+stub_space, MAP_ANON, -1, 0);
-          if( mem == NULL ) {
+          void * mem = mmapForLinker(size+stub_space, PROT_READ | PROT_WRITE, MAP_ANON, -1, 0);
+
+          if( mem == MAP_FAILED ) {
               barf("failed to mmap allocated memory to load section %d. "
                    "errno = %d", i, errno);
           }
@@ -841,6 +842,26 @@ ocGetNames_ELF ( ObjectCode* oc )
 
       unsigned curSymbol = 0;
 
+      unsigned long common_size = 0;
+      unsigned long common_used = 0;
+      for(ElfSymbolTable *symTab = oc->info->symbolTables;
+           symTab != NULL; symTab = symTab->next) {
+           for (size_t j = 0; j < symTab->n_symbols; j++) {
+               ElfSymbol *symbol = &symTab->symbols[j];
+               if (SHN_COMMON == symTab->symbols[j].elf_sym->st_shndx) {
+                   common_size += symbol->elf_sym->st_size;
+               }
+           }
+      }
+      void * common_mem = NULL;
+      if(common_size > 0) {
+          common_mem = mmapForLinker(common_size,
+                            PROT_READ | PROT_WRITE,
+                            MAP_ANON | MAP_PRIVATE,
+                            -1, 0);
+          ASSERT(common_mem != NULL);
+      }
+
       //TODO: we ignore local symbols anyway right? So we can use the
       //      shdr[i].sh_info to get the index of the first non-local symbol
       // ie we should use j = shdr[i].sh_info
@@ -876,12 +897,15 @@ ocGetNames_ELF ( ObjectCode* oc )
 
                if (shndx == SHN_COMMON) {
                    isLocal = false;
-                   symbol->addr = stgCallocBytes(1, symbol->elf_sym->st_size,
-                                       "ocGetNames_ELF(COMMON)");
-                   /*
-                   debugBelch("COMMON symbol, size %d name %s\n",
-                                   stab[j].st_size, nm);
-                   */
+                   ASSERT(common_used < common_size);
+                   ASSERT(common_mem);
+                   symbol->addr = (void*)((uintptr_t)common_mem + common_used);
+                   common_used += symbol->elf_sym->st_size;
+                   ASSERT(common_used <= common_size);
+
+                   debugBelch("COMMON symbol, size %ld name %s allocated at %p\n",
+                                   symbol->elf_sym->st_size, nm, symbol->addr);
+
                    /* Pointless to do addProddableBlock() for this area,
                       since the linker should never poke around in it. */
                } else if ((ELF_ST_BIND(symbol->elf_sym->st_info) == STB_GLOBAL
diff --git a/rts/linker/LoadArchive.c b/rts/linker/LoadArchive.c
index f77ff43ce38dbf004d22de76c10d96fdd5506187..89350956de6418c840ce1bc57c5641ed5560ac96 100644
--- a/rts/linker/LoadArchive.c
+++ b/rts/linker/LoadArchive.c
@@ -489,7 +489,7 @@ static HsInt loadArchive_ (pathchar *path)
 
 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
             if (RTS_LINKER_USE_MMAP)
-                image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
+                image = mmapForLinker(memberSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS, -1, 0);
             else {
                 /* See loadObj() */
                 misalignment = machoGetMisalignment(f);
@@ -548,7 +548,7 @@ while reading filename from `%" PATH_FMT "'", path);
             }
             DEBUG_LOG("Found GNU-variant file index\n");
 #if RTS_LINKER_USE_MMAP
-            gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
+            gnuFileIndex = mmapForLinker(memberSize + 1, PROT_READ | PROT_WRITE, MAP_ANONYMOUS, -1, 0);
 #else
             gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
 #endif
diff --git a/rts/linker/M32Alloc.c b/rts/linker/M32Alloc.c
index 1a19df8471d8bf55422005e6682cbe5b80802e2e..ca5af8312c5c65a4aa6914541baa8ab05122fed9 100644
--- a/rts/linker/M32Alloc.c
+++ b/rts/linker/M32Alloc.c
@@ -256,7 +256,7 @@ m32_alloc_page(void)
     m32_free_page_pool_size --;
     return page;
   } else {
-    struct m32_page_t *page = mmapForLinker(getPageSize(),MAP_ANONYMOUS,-1,0);
+    struct m32_page_t *page = mmapForLinker(getPageSize(), PROT_READ | PROT_WRITE, MAP_ANONYMOUS, -1, 0);
     if (page > (struct m32_page_t *) 0xffffffff) {
       barf("m32_alloc_page: failed to get allocation in lower 32-bits");
     }
@@ -280,7 +280,7 @@ m32_allocator_new(bool executable)
   // Preallocate the initial M32_MAX_PAGES to ensure that they don't
   // fragment the memory.
   size_t pgsz = getPageSize();
-  char* bigchunk = mmapForLinker(pgsz * M32_MAX_PAGES,MAP_ANONYMOUS,-1,0);
+  char* bigchunk = mmapForLinker(pgsz * M32_MAX_PAGES, PROT_READ | PROT_WRITE, MAP_ANONYMOUS,-1,0);
   if (bigchunk == NULL)
       barf("m32_allocator_init: Failed to map");
 
@@ -396,7 +396,7 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment)
    if (m32_is_large_object(size,alignment)) {
       // large object
       size_t alsize = ROUND_UP(sizeof(struct m32_page_t), alignment);
-      struct m32_page_t *page = mmapForLinker(alsize+size,MAP_ANONYMOUS,-1,0);
+      struct m32_page_t *page = mmapForLinker(alsize+size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS,-1,0);
       page->filled_page.size = alsize + size;
       m32_allocator_push_filled_list(&alloc->unprotected_list, (struct m32_page_t *) page);
       return (char*) page + alsize;
diff --git a/rts/linker/MachO.c b/rts/linker/MachO.c
index 970cf9c6cb49886bb4d792b48a1055c41ef9c19e..09c240f1f0b9aa2257ed15d5178ea8b6e6404d3f 100644
--- a/rts/linker/MachO.c
+++ b/rts/linker/MachO.c
@@ -508,7 +508,7 @@ makeGot(ObjectCode * oc) {
 
     if(got_slots > 0) {
         oc->info->got_size =  got_slots * sizeof(void*);
-        oc->info->got_start = mmap(NULL, oc->info->got_size,
+        oc->info->got_start = mmapForLinker(oc->info->got_size,
                                    PROT_READ | PROT_WRITE,
                                    MAP_ANON | MAP_PRIVATE,
                                    -1, 0);
@@ -1114,7 +1114,7 @@ ocBuildSegments_MachO(ObjectCode *oc)
         return 1;
     }
 
-    mem = mmapForLinker(size_compound, MAP_ANON, -1, 0);
+    mem = mmapForLinker(size_compound, PROT_READ | PROT_WRITE, MAP_ANON, -1, 0);
     if (NULL == mem) return 0;
 
     IF_DEBUG(linker, debugBelch("ocBuildSegments: allocating %d segments\n", n_activeSegments));
diff --git a/rts/linker/SymbolExtras.c b/rts/linker/SymbolExtras.c
index b7b558cb41212d918f8193a18ae85e6f85851c15..b5a06c662c5246a42c0ad7c35c29e7f0f4c51116 100644
--- a/rts/linker/SymbolExtras.c
+++ b/rts/linker/SymbolExtras.c
@@ -79,7 +79,7 @@ int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize)
       size_t n = roundUpToPage(oc->fileSize);
       bssSize = roundUpToAlign(bssSize, 8);
       size_t allocated_size = n + bssSize + extras_size;
-      void *new = mmapForLinker(allocated_size, MAP_ANONYMOUS, -1, 0);
+      void *new = mmapForLinker(allocated_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS, -1, 0);
       if (new) {
           memcpy(new, oc->image, oc->fileSize);
           if (oc->imageMapped) {
diff --git a/rts/linker/elf_got.c b/rts/linker/elf_got.c
index 991c6a9f910e896ad7b3802809361c20009a6fbd..58d5c93b64cd4a87266b777c2950817a25f0312b 100644
--- a/rts/linker/elf_got.c
+++ b/rts/linker/elf_got.c
@@ -48,7 +48,7 @@ makeGot(ObjectCode * oc) {
     }
     if(got_slots > 0) {
         oc->info->got_size = got_slots * sizeof(void *);
-         void * mem = mmap(NULL, oc->info->got_size,
+         void * mem = mmapForLinker(oc->info->got_size,
                            PROT_READ | PROT_WRITE,
                            MAP_ANON | MAP_PRIVATE,
                            -1, 0);