@@ -1251,6 +1251,7 @@
this->package_->bindings()->clear_file_scope();
// Warn about packages which were imported but not used.
+ bool quiet = saw_errors();
for (Packages::iterator p = this->packages_.begin();
p != this->packages_.end();
++p)
@@ -1260,7 +1261,7 @@
&& package->is_imported()
&& !package->used()
&& !package->uses_sink_alias()
- && !saw_errors())
+ && !quiet)
error_at(package->location(), "imported and not used: %s",
Gogo::message_name(package->package_name()).c_str());
package->clear_is_imported();
@@ -32,6 +32,8 @@
RFT_BOOLPTR,
// Go type int, C type int.
RFT_INT,
+ // Go type int32, C type int32_t.
+ RFT_INT32,
// Go type int64, C type int64_t.
RFT_INT64,
// Go type uint64, C type uint64_t.
@@ -102,6 +104,10 @@
t = Type::lookup_integer_type("int");
break;
+ case RFT_INT32:
+ t = Type::lookup_integer_type("int32");
+ break;
+
case RFT_INT64:
t = Type::lookup_integer_type("int64");
break;
@@ -206,6 +212,7 @@
case RFT_BOOL:
case RFT_BOOLPTR:
case RFT_INT:
+ case RFT_INT32:
case RFT_INT64:
case RFT_UINT64:
case RFT_UINTPTR:
@@ -148,27 +148,28 @@
// Start building a select statement.
-DEF_GO_RUNTIME(NEWSELECT, "runtime.newselect", P1(INT), R1(POINTER))
+DEF_GO_RUNTIME(NEWSELECT, "runtime.newselect", P1(INT32), R1(POINTER))
// Add a default clause to a select statement.
-DEF_GO_RUNTIME(SELECTDEFAULT, "runtime.selectdefault", P2(POINTER, INT), R0())
+DEF_GO_RUNTIME(SELECTDEFAULT, "runtime.selectdefault",
+ P2(POINTER, INT32), R0())
// Add a send clause to a select statement.
DEF_GO_RUNTIME(SELECTSEND, "runtime.selectsend",
- P4(POINTER, CHAN, POINTER, INT), R0())
+ P4(POINTER, CHAN, POINTER, INT32), R0())
// Add a receive clause to a select statement, for a clause which does
// not check whether the channel is closed.
DEF_GO_RUNTIME(SELECTRECV, "runtime.selectrecv",
- P4(POINTER, CHAN, POINTER, INT), R0())
+ P4(POINTER, CHAN, POINTER, INT32), R0())
// Add a receive clause to a select statement, for a clause which does
// check whether the channel is closed.
DEF_GO_RUNTIME(SELECTRECV2, "runtime.selectrecv2",
- P5(POINTER, CHAN, POINTER, BOOLPTR, INT), R0())
+ P5(POINTER, CHAN, POINTER, BOOLPTR, INT32), R0())
// Run a select, returning the index of the selected clause.
-DEF_GO_RUNTIME(SELECTGO, "runtime.selectgo", P1(POINTER), R1(INT))
+DEF_GO_RUNTIME(SELECTGO, "runtime.selectgo", P1(POINTER), R1(INT32))
// Panic.
@@ -4841,6 +4841,8 @@
std::vector<std::vector<Bexpression*> > cases(count);
std::vector<Bstatement*> clauses(count);
+ Type* int32_type = Type::lookup_integer_type("int32");
+
int i = 0;
for (Clauses::iterator p = this->clauses_.begin();
p != this->clauses_.end();
@@ -4849,7 +4851,8 @@
int index = p->index();
mpz_t ival;
mpz_init_set_ui(ival, index);
- Expression* index_expr = Expression::make_integer(&ival, NULL, location);
+ Expression* index_expr = Expression::make_integer(&ival, int32_type,
+ location);
mpz_clear(ival);
cases[i].push_back(tree_to_expr(index_expr->get_tree(context)));
@@ -1,4 +1,4 @@
-2d8bc3c94ecb
+291d9f1baf75
The first line of this file holds the Mercurial revision number of the
last merge done from the master library sources.
@@ -230,6 +230,21 @@
exp/types.gox \
exp/utf8string.gox
+toolexeclibgoexphtmldir = $(toolexeclibgoexpdir)/html
+
+toolexeclibgoexphtml_DATA = \
+ exp/html/atom.gox
+
+toolexeclibgoexplocaledir = $(toolexeclibgoexpdir)/locale
+
+toolexeclibgoexplocale_DATA = \
+ exp/locale/collate.gox
+
+toolexeclibgoexplocalecollatedir = $(toolexeclibgoexplocaledir)/collate
+
+toolexeclibgoexplocalecollate_DATA = \
+ exp/locale/collate/build.gox
+
toolexeclibgogodir = $(toolexeclibgodir)/go
toolexeclibgogo_DATA = \
@@ -483,6 +498,7 @@
runtime/go-unwind.c \
runtime/chan.c \
runtime/cpuprof.c \
+ runtime/lfstack.c \
$(runtime_lock_files) \
runtime/mcache.c \
runtime/mcentral.c \
@@ -492,6 +508,8 @@
runtime/mgc0.c \
runtime/mheap.c \
runtime/msize.c \
+ runtime/panic.c \
+ runtime/parfor.c \
runtime/print.c \
runtime/proc.c \
runtime/runtime.c \
@@ -656,16 +674,16 @@
else # !LIBGO_IS_RTEMS
if LIBGO_IS_LINUX
go_net_fd_os_file = go/net/fd_linux.go
-go_net_newpollserver_file = go/net/newpollserver.go
+go_net_newpollserver_file = go/net/newpollserver_unix.go
else # !LIBGO_IS_LINUX && !LIBGO_IS_RTEMS
if LIBGO_IS_NETBSD
go_net_fd_os_file = go/net/fd_netbsd.go
-go_net_newpollserver_file = go/net/newpollserver.go
+go_net_newpollserver_file = go/net/newpollserver_unix.go
else # !LIBGO_IS_NETBSD && !LIBGO_IS_LINUX && !LIBGO_IS_RTEMS
# By default use select with pipes. Most systems should have
# something better.
go_net_fd_os_file = go/net/fd_select.go
-go_net_newpollserver_file = go/net/newpollserver.go
+go_net_newpollserver_file = go/net/newpollserver_unix.go
endif # !LIBGO_IS_NETBSD
endif # !LIBGO_IS_LINUX
endif # !LIBGO_IS_RTEMS
@@ -674,13 +692,13 @@
go_net_cgo_file = go/net/cgo_linux.go
go_net_sock_file = go/net/sock_linux.go
go_net_sockopt_file = go/net/sockopt_linux.go
-go_net_sockoptip_file = go/net/sockoptip_linux.go
+go_net_sockoptip_file = go/net/sockoptip_linux.go go/net/sockoptip_posix.go
else
if LIBGO_IS_IRIX
go_net_cgo_file = go/net/cgo_linux.go
go_net_sock_file = go/net/sock_linux.go
go_net_sockopt_file = go/net/sockopt_linux.go
-go_net_sockoptip_file = go/net/sockoptip_linux.go
+go_net_sockoptip_file = go/net/sockoptip_linux.go go/net/sockoptip_posix.go
else
if LIBGO_IS_SOLARIS
go_net_cgo_file = go/net/cgo_linux.go
@@ -692,12 +710,19 @@
go_net_cgo_file = go/net/cgo_bsd.go
go_net_sock_file = go/net/sock_bsd.go
go_net_sockopt_file = go/net/sockopt_bsd.go
-go_net_sockoptip_file = go/net/sockoptip_bsd.go go/net/sockoptip_freebsd.go
+go_net_sockoptip_file = go/net/sockoptip_bsd.go go/net/sockoptip_posix.go
+else
+if LIBGO_IS_NETBSD
+go_net_cgo_file = go/net/cgo_netbsd.go
+go_net_sock_file = go/net/sock_bsd.go
+go_net_sockopt_file = go/net/sockopt_bsd.go
+go_net_sockoptip_file = go/net/sockoptip_bsd.go go/net/sockoptip_posix.go
else
go_net_cgo_file = go/net/cgo_bsd.go
go_net_sock_file = go/net/sock_bsd.go
go_net_sockopt_file = go/net/sockopt_bsd.go
-go_net_sockoptip_file = go/net/sockoptip_bsd.go go/net/sockoptip_netbsd.go
+go_net_sockoptip_file = go/net/sockoptip_bsd.go go/net/sockoptip_posix.go
+endif
endif
endif
endif
@@ -706,8 +731,12 @@
if LIBGO_IS_LINUX
go_net_sendfile_file = go/net/sendfile_linux.go
else
+if LIBGO_IS_FREEBSD
+go_net_sendfile_file = go/net/sendfile_freebsd.go
+else
go_net_sendfile_file = go/net/sendfile_stub.go
endif
+endif
if LIBGO_IS_LINUX
go_net_interface_file = go/net/interface_linux.go
@@ -725,13 +754,12 @@
go/net/dial.go \
go/net/dnsclient.go \
go/net/dnsclient_unix.go \
- go/net/dnsconfig.go \
+ go/net/dnsconfig_unix.go \
go/net/dnsmsg.go \
- go/net/doc.go \
$(go_net_newpollserver_file) \
- go/net/fd.go \
+ go/net/fd_unix.go \
$(go_net_fd_os_file) \
- go/net/file.go \
+ go/net/file_unix.go \
go/net/hosts.go \
go/net/interface.go \
$(go_net_interface_file) \
@@ -740,6 +768,7 @@
go/net/iprawsock_posix.go \
go/net/ipsock.go \
go/net/ipsock_posix.go \
+ go/net/lookup.go \
go/net/lookup_unix.go \
go/net/mac.go \
go/net/net.go \
@@ -747,12 +776,12 @@
go/net/parse.go \
go/net/pipe.go \
go/net/port.go \
+ go/net/port_unix.go \
$(go_net_sendfile_file) \
- go/net/sock.go \
+ go/net/sock_posix.go \
$(go_net_sock_file) \
- go/net/sockopt.go \
+ go/net/sockopt_posix.go \
$(go_net_sockopt_file) \
- go/net/sockoptip.go \
$(go_net_sockoptip_file) \
go/net/tcpsock.go \
go/net/tcpsock_posix.go \
@@ -831,6 +860,7 @@
go_reflect_files = \
go/reflect/deepequal.go \
+ go/reflect/makefunc.go \
go/reflect/type.go \
go/reflect/value.go
@@ -882,12 +912,14 @@
go_strings_files = \
go/strings/reader.go \
go/strings/replace.go \
+ go/strings/search.go \
go/strings/strings.go
go_sync_files = \
go/sync/cond.go \
go/sync/mutex.go \
go/sync/once.go \
+ go/sync/race0.go \
go/sync/runtime.go \
go/sync/rwmutex.go \
go/sync/waitgroup.go
@@ -930,11 +962,28 @@
go/unicode/letter.go \
go/unicode/tables.go
+if LIBGO_IS_LINUX
+archive_tar_atim_file = go/archive/tar/stat_atim.go
+endif
+if LIBGO_IS_OPENBSD
+archive_tar_atim_file = go/archive/tar/stat_atim.go
+endif
+if LIBGO_IS_DARWIN
+archive_tar_atim_file = go/archive/tar/stat_atimespec.go
+endif
+if LIBGO_IS_FREEBSD
+archive_tar_atim_file = go/archive/tar/stat_atimespec.go
+endif
+if LIBGO_IS_NETBSD
+archive_tar_atim_file = go/archive/tar/stat_atimespec.go
+endif
go_archive_tar_files = \
go/archive/tar/common.go \
go/archive/tar/reader.go \
- go/archive/tar/writer.go
+ go/archive/tar/stat_unix.go \
+ go/archive/tar/writer.go \
+ $(archive_tar_atim_file)
go_archive_zip_files = \
go/archive/zip/reader.go \
@@ -948,6 +997,7 @@
go/compress/bzip2/move_to_front.go
go_compress_flate_files = \
+ go/compress/flate/copy.go \
go/compress/flate/deflate.go \
go/compress/flate/huffman_bit_writer.go \
go/compress/flate/huffman_code.go \
@@ -979,6 +1029,7 @@
go_crypto_aes_files = \
go/crypto/aes/block.go \
go/crypto/aes/cipher.go \
+ go/crypto/aes/cipher_generic.go \
go/crypto/aes/const.go
go_crypto_cipher_files = \
go/crypto/cipher/cbc.go \
@@ -1033,9 +1084,11 @@
go/crypto/tls/handshake_server.go \
go/crypto/tls/key_agreement.go \
go/crypto/tls/prf.go \
+ go/crypto/tls/ticket.go \
go/crypto/tls/tls.go
go_crypto_x509_files = \
go/crypto/x509/cert_pool.go \
+ go/crypto/x509/pem_decrypt.go \
go/crypto/x509/pkcs1.go \
go/crypto/x509/pkcs8.go \
go/crypto/x509/root.go \
@@ -1130,8 +1183,26 @@
go/exp/html/parse.go \
go/exp/html/render.go \
go/exp/html/token.go
+go_exp_html_atom_files = \
+ go/exp/html/atom/atom.go \
+ go/exp/html/atom/table.go
go_exp_inotify_files = \
go/exp/inotify/inotify_linux.go
+go_exp_locale_collate_files = \
+ go/exp/locale/collate/colelem.go \
+ go/exp/locale/collate/collate.go \
+ go/exp/locale/collate/contract.go \
+ go/exp/locale/collate/export.go \
+ go/exp/locale/collate/table.go \
+ go/exp/locale/collate/tables.go \
+ go/exp/locale/collate/trie.go
+go_exp_locale_collate_build_files = \
+ go/exp/locale/collate/build/builder.go \
+ go/exp/locale/collate/build/colelem.go \
+ go/exp/locale/collate/build/contract.go \
+ go/exp/locale/collate/build/order.go \
+ go/exp/locale/collate/build/table.go \
+ go/exp/locale/collate/build/trie.go
go_exp_norm_files = \
go/exp/norm/composition.go \
go/exp/norm/forminfo.go \
@@ -1161,6 +1232,7 @@
go_go_ast_files = \
go/go/ast/ast.go \
+ go/go/ast/commentmap.go \
go/go/ast/filter.go \
go/go/ast/import.go \
go/go/ast/print.go \
@@ -1170,6 +1242,7 @@
go_go_build_files = \
go/go/build/build.go \
go/go/build/doc.go \
+ go/go/build/read.go \
syslist.go
go_go_doc_files = \
go/go/doc/comment.go \
@@ -1235,6 +1308,7 @@
go/image/jpeg/writer.go
go_image_png_files = \
+ go/image/png/paeth.go \
go/image/png/reader.go \
go/image/png/writer.go
@@ -1243,6 +1317,7 @@
go/index/suffixarray/suffixarray.go
go_io_ioutil_files = \
+ go/io/ioutil/blackhole.go \
go/io/ioutil/ioutil.go \
go/io/ioutil/tempfile.go
@@ -1358,6 +1433,7 @@
go_regexp_syntax_files = \
go/regexp/syntax/compile.go \
+ go/regexp/syntax/doc.go \
go/regexp/syntax/parse.go \
go/regexp/syntax/perl_groups.go \
go/regexp/syntax/prog.go \
@@ -1544,6 +1620,7 @@
go/syscall/syscall_errno.go \
go/syscall/libcall_support.go \
go/syscall/libcall_posix.go \
+ go/syscall/race0.go \
go/syscall/socket.go \
go/syscall/sockcmsg_unix.go \
go/syscall/str.go \
@@ -1714,6 +1791,9 @@
encoding/xml.lo \
exp/ebnf.lo \
exp/html.lo \
+ exp/html/atom.lo \
+ exp/locale/collate.lo \
+ exp/locale/collate/build.lo \
exp/norm.lo \
exp/proxy.lo \
exp/terminal.lo \
@@ -2562,6 +2642,33 @@
@$(CHECK)
.PHONY: exp/html/check
+@go_include@ exp/html/atom.lo.dep
+exp/html/atom.lo.dep: $(go_exp_html_atom_files)
+ $(BUILDDEPS)
+exp/html/atom.lo: $(go_exp_html_atom_files)
+ $(BUILDPACKAGE)
+exp/html/atom/check: $(CHECK_DEPS)
+ @$(CHECK)
+.PHONY: exp/html/atom/check
+
+@go_include@ exp/locale/collate.lo.dep
+exp/locale/collate.lo.dep: $(go_exp_locale_collate_files)
+ $(BUILDDEPS)
+exp/locale/collate.lo: $(go_exp_locale_collate_files)
+ $(BUILDPACKAGE)
+exp/locale/collate/check: $(CHECK_DEPS)
+ @$(CHECK)
+.PHONY: exp/locale/collate/check
+
+@go_include@ exp/locale/collate/build.lo.dep
+exp/locale/collate/build.lo.dep: $(go_exp_locale_collate_build_files)
+ $(BUILDDEPS)
+exp/locale/collate/build.lo: $(go_exp_locale_collate_build_files)
+ $(BUILDPACKAGE)
+exp/locale/collate/build/check: $(CHECK_DEPS)
+ @$(CHECK)
+.PHONY: exp/locale/collate/build/check
+
@go_include@ exp/norm.lo.dep
exp/norm.lo.dep: $(go_exp_norm_files)
$(BUILDDEPS)
@@ -3142,6 +3249,9 @@
syscall/wait.lo: go/syscall/wait.c
@$(MKDIR_P) syscall
$(LTCOMPILE) -c -o $@ $<
+syscall/check: $(CHECK_DEPS)
+ @$(CHECK)
+.PHONY: syscall/check
# How to build a .gox file from a .lo file.
BUILDGOX = \
@@ -3310,8 +3420,14 @@
$(BUILDGOX)
exp/html.gox: exp/html.lo
$(BUILDGOX)
+exp/html/atom.gox: exp/html/atom.lo
+ $(BUILDGOX)
exp/inotify.gox: exp/inotify.lo
$(BUILDGOX)
+exp/locale/collate.gox: exp/locale/collate.lo
+ $(BUILDGOX)
+exp/locale/collate/build.gox: exp/locale/collate/build.lo
+ $(BUILDGOX)
exp/norm.gox: exp/norm.lo
$(BUILDGOX)
exp/proxy.gox: exp/proxy.lo
@@ -3484,6 +3600,7 @@
strconv/check \
strings/check \
sync/check \
+ syscall/check \
time/check \
unicode/check \
archive/tar/check \
@@ -3532,10 +3649,14 @@
encoding/xml/check \
exp/ebnf/check \
exp/html/check \
+ exp/html/atom/check \
$(exp_inotify_check) \
+ exp/locale/collate/check \
+ exp/locale/collate/build/check \
exp/norm/check \
exp/proxy/check \
exp/terminal/check \
+ exp/types/check \
exp/utf8string/check \
html/template/check \
go/ast/check \
@@ -129,6 +129,7 @@
is_irix=no
is_linux=no
is_netbsd=no
+is_openbsd=no
is_rtems=no
is_solaris=no
GOOS=unknown
@@ -138,6 +139,7 @@
*-*-irix6*) is_irix=yes; GOOS=irix ;;
*-*-linux*) is_linux=yes; GOOS=linux ;;
*-*-netbsd*) is_netbsd=yes; GOOS=netbsd ;;
+ *-*-openbsd*) is_openbsd=yes; GOOS=openbsd ;;
*-*-rtems*) is_rtems=yes; GOOS=rtems ;;
*-*-solaris2*) is_solaris=yes; GOOS=solaris ;;
esac
@@ -146,6 +148,7 @@
AM_CONDITIONAL(LIBGO_IS_IRIX, test $is_irix = yes)
AM_CONDITIONAL(LIBGO_IS_LINUX, test $is_linux = yes)
AM_CONDITIONAL(LIBGO_IS_NETBSD, test $is_netbsd = yes)
+AM_CONDITIONAL(LIBGO_IS_OPENBSD, test $is_openbsd = yes)
AM_CONDITIONAL(LIBGO_IS_RTEMS, test $is_rtems = yes)
AM_CONDITIONAL(LIBGO_IS_SOLARIS, test $is_solaris = yes)
AC_SUBST(GOOS)
@@ -4,8 +4,9 @@
#include "runtime.h"
#include "arch.h"
+#include "go-type.h"
+#include "race.h"
#include "malloc.h"
-#include "go-type.h"
#define NOSELGEN 1
@@ -24,6 +25,7 @@
G* g; // g and selgen constitute
uint32 selgen; // a weak pointer to g
SudoG* link;
+ int64 releasetime;
byte* elem; // data element
};
@@ -35,13 +37,13 @@
struct Hchan
{
- uint32 qcount; // total data in the q
- uint32 dataqsiz; // size of the circular q
+ uintgo qcount; // total data in the q
+ uintgo dataqsiz; // size of the circular q
uint16 elemsize;
bool closed;
uint8 elemalign;
- uint32 sendx; // send index
- uint32 recvx; // receive index
+ uintgo sendx; // send index
+ uintgo recvx; // receive index
WaitQ recvq; // list of recv waiters
WaitQ sendq; // list of send waiters
Lock;
@@ -80,17 +82,22 @@
static void dequeueg(WaitQ*);
static SudoG* dequeue(WaitQ*);
static void enqueue(WaitQ*, SudoG*);
+static void racesync(Hchan*, SudoG*);
Hchan*
runtime_makechan_c(ChanType *t, int64 hint)
{
Hchan *c;
- int32 n;
+ uintptr n;
const Type *elem;
elem = t->__element_type;
- if(hint < 0 || (int32)hint != hint || (elem->__size > 0 && (uintptr)hint > MaxMem / elem->__size))
+ // compiler checks this but be safe.
+ if(elem->__size >= (1<<16))
+ runtime_throw("makechan: invalid channel element type");
+
+ if(hint < 0 || (intgo)hint != hint || (elem->__size > 0 && (uintptr)hint > MaxMem / elem->__size))
runtime_panicstring("makechan: size out of range");
n = sizeof(*c);
@@ -102,19 +109,19 @@
c->dataqsiz = hint;
if(debug)
- runtime_printf("makechan: chan=%p; elemsize=%D; elemalign=%d; dataqsiz=%d\n",
- c, (int64)elem->__size, elem->__align, c->dataqsiz);
+ runtime_printf("makechan: chan=%p; elemsize=%D; elemalign=%d; dataqsiz=%D\n",
+ c, (int64)elem->__size, elem->__align, (int64)c->dataqsiz);
return c;
}
// For reflect
-// func makechan(typ *ChanType, size uint32) (chan)
-uintptr reflect_makechan(ChanType *, uint32)
+// func makechan(typ *ChanType, size uint64) (chan)
+uintptr reflect_makechan(ChanType *, uint64)
asm ("reflect.makechan");
uintptr
-reflect_makechan(ChanType *t, uint32 size)
+reflect_makechan(ChanType *t, uint64 size)
{
void *ret;
Hchan *c;
@@ -153,11 +160,12 @@
* the operation; we'll see that it's now closed.
*/
void
-runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres)
+runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc)
{
SudoG *sg;
SudoG mysg;
G* gp;
+ int64 t0;
G* g;
g = runtime_g();
@@ -168,9 +176,7 @@
*pres = false;
return;
}
- g->status = Gwaiting;
- g->waitreason = "chan send (nil chan)";
- runtime_gosched();
+ runtime_park(nil, nil, "chan send (nil chan)");
return; // not reached
}
@@ -181,7 +187,17 @@
runtime_printf("chansend: chan=%p\n", c);
}
+ t0 = 0;
+ mysg.releasetime = 0;
+ if(runtime_blockprofilerate > 0) {
+ t0 = runtime_cputicks();
+ mysg.releasetime = -1;
+ }
+
runtime_lock(c);
+ // TODO(dvyukov): add similar instrumentation to select.
+ if(raceenabled)
+ runtime_racereadpc(c, pc);
if(c->closed)
goto closed;
@@ -190,12 +206,16 @@
sg = dequeue(&c->recvq);
if(sg != nil) {
+ if(raceenabled)
+ racesync(c, sg);
runtime_unlock(c);
gp = sg->g;
gp->param = sg;
if(sg->elem != nil)
runtime_memmove(sg->elem, ep, c->elemsize);
+ if(sg->releasetime)
+ sg->releasetime = runtime_cputicks();
runtime_ready(gp);
if(pres != nil)
@@ -213,11 +233,8 @@
mysg.g = g;
mysg.selgen = NOSELGEN;
g->param = nil;
- g->status = Gwaiting;
- g->waitreason = "chan send";
enqueue(&c->sendq, &mysg);
- runtime_unlock(c);
- runtime_gosched();
+ runtime_park(runtime_unlock, c, "chan send");
if(g->param == nil) {
runtime_lock(c);
@@ -226,6 +243,9 @@
goto closed;
}
+ if(mysg.releasetime > 0)
+ runtime_blockevent(mysg.releasetime - t0, 2);
+
return;
asynch:
@@ -241,15 +261,16 @@
mysg.g = g;
mysg.elem = nil;
mysg.selgen = NOSELGEN;
- g->status = Gwaiting;
- g->waitreason = "chan send";
enqueue(&c->sendq, &mysg);
- runtime_unlock(c);
- runtime_gosched();
+ runtime_park(runtime_unlock, c, "chan send");
runtime_lock(c);
goto asynch;
}
+
+ if(raceenabled)
+ runtime_racerelease(chanbuf(c, c->sendx));
+
runtime_memmove(chanbuf(c, c->sendx), ep, c->elemsize);
if(++c->sendx == c->dataqsiz)
c->sendx = 0;
@@ -259,11 +280,15 @@
if(sg != nil) {
gp = sg->g;
runtime_unlock(c);
+ if(sg->releasetime)
+ sg->releasetime = runtime_cputicks();
runtime_ready(gp);
} else
runtime_unlock(c);
if(pres != nil)
*pres = true;
+ if(mysg.releasetime > 0)
+ runtime_blockevent(mysg.releasetime - t0, 2);
return;
closed:
@@ -278,6 +303,7 @@
SudoG *sg;
SudoG mysg;
G *gp;
+ int64 t0;
G *g;
if(runtime_gcwaiting)
@@ -294,12 +320,17 @@
*selected = false;
return;
}
- g->status = Gwaiting;
- g->waitreason = "chan receive (nil chan)";
- runtime_gosched();
+ runtime_park(nil, nil, "chan receive (nil chan)");
return; // not reached
}
+ t0 = 0;
+ mysg.releasetime = 0;
+ if(runtime_blockprofilerate > 0) {
+ t0 = runtime_cputicks();
+ mysg.releasetime = -1;
+ }
+
runtime_lock(c);
if(c->dataqsiz > 0)
goto asynch;
@@ -309,12 +340,16 @@
sg = dequeue(&c->sendq);
if(sg != nil) {
+ if(raceenabled)
+ racesync(c, sg);
runtime_unlock(c);
if(ep != nil)
runtime_memmove(ep, sg->elem, c->elemsize);
gp = sg->g;
gp->param = sg;
+ if(sg->releasetime)
+ sg->releasetime = runtime_cputicks();
runtime_ready(gp);
if(selected != nil)
@@ -334,11 +369,8 @@
mysg.g = g;
mysg.selgen = NOSELGEN;
g->param = nil;
- g->status = Gwaiting;
- g->waitreason = "chan receive";
enqueue(&c->recvq, &mysg);
- runtime_unlock(c);
- runtime_gosched();
+ runtime_park(runtime_unlock, c, "chan receive");
if(g->param == nil) {
runtime_lock(c);
@@ -349,6 +381,8 @@
if(received != nil)
*received = true;
+ if(mysg.releasetime > 0)
+ runtime_blockevent(mysg.releasetime - t0, 2);
return;
asynch:
@@ -366,15 +400,16 @@
mysg.g = g;
mysg.elem = nil;
mysg.selgen = NOSELGEN;
- g->status = Gwaiting;
- g->waitreason = "chan receive";
enqueue(&c->recvq, &mysg);
- runtime_unlock(c);
- runtime_gosched();
+ runtime_park(runtime_unlock, c, "chan receive");
runtime_lock(c);
goto asynch;
}
+
+ if(raceenabled)
+ runtime_raceacquire(chanbuf(c, c->recvx));
+
if(ep != nil)
runtime_memmove(ep, chanbuf(c, c->recvx), c->elemsize);
runtime_memclr(chanbuf(c, c->recvx), c->elemsize);
@@ -386,6 +421,8 @@
if(sg != nil) {
gp = sg->g;
runtime_unlock(c);
+ if(sg->releasetime)
+ sg->releasetime = runtime_cputicks();
runtime_ready(gp);
} else
runtime_unlock(c);
@@ -394,6 +431,8 @@
*selected = true;
if(received != nil)
*received = true;
+ if(mysg.releasetime > 0)
+ runtime_blockevent(mysg.releasetime - t0, 2);
return;
closed:
@@ -403,7 +442,11 @@
*selected = true;
if(received != nil)
*received = false;
+ if(raceenabled)
+ runtime_raceacquire(c);
runtime_unlock(c);
+ if(mysg.releasetime > 0)
+ runtime_blockevent(mysg.releasetime - t0, 2);
}
// The compiler generates a call to __go_send_small to send a value 8
@@ -424,7 +467,7 @@
#else
p = u.b + sizeof(uint64) - t->__element_type->__size;
#endif
- runtime_chansend(t, c, p, nil);
+ runtime_chansend(t, c, p, nil, runtime_getcallerpc(&t));
}
// The compiler generates a call to __go_send_big to send a value
@@ -432,7 +475,7 @@
void
__go_send_big(ChanType *t, Hchan* c, byte* p)
{
- runtime_chansend(t, c, p, nil);
+ runtime_chansend(t, c, p, nil, runtime_getcallerpc(&t));
}
// The compiler generates a call to __go_receive_small to receive a
@@ -500,7 +543,7 @@
{
bool res;
- runtime_chansend(t, c, p, &res);
+ runtime_chansend(t, c, p, &res, runtime_getcallerpc(&t));
return res;
}
@@ -590,7 +633,7 @@
vp = (byte*)&val;
else
vp = (byte*)val;
- runtime_chansend(t, c, vp, sp);
+ runtime_chansend(t, c, vp, sp, runtime_getcallerpc(&t));
return selected;
}
@@ -643,10 +686,10 @@
// newselect(size uint32) (sel *byte);
-void* runtime_newselect(int) __asm__("runtime.newselect");
+void* runtime_newselect(int32) __asm__("runtime.newselect");
void*
-runtime_newselect(int size)
+runtime_newselect(int32 size)
{
Select *sel;
@@ -688,11 +731,11 @@
// selectsend(sel *byte, hchan *chan any, elem *any) (selected bool);
-void runtime_selectsend(Select *, Hchan *, void *, int)
+void runtime_selectsend(Select *, Hchan *, void *, int32)
__asm__("runtime.selectsend");
void
-runtime_selectsend(Select *sel, Hchan *c, void *elem, int index)
+runtime_selectsend(Select *sel, Hchan *c, void *elem, int32 index)
{
// nil cases do not compete
if(c == nil)
@@ -728,11 +771,11 @@
// selectrecv(sel *byte, hchan *chan any, elem *any) (selected bool);
-void runtime_selectrecv(Select *, Hchan *, void *, int)
+void runtime_selectrecv(Select *, Hchan *, void *, int32)
__asm__("runtime.selectrecv");
void
-runtime_selectrecv(Select *sel, Hchan *c, void *elem, int index)
+runtime_selectrecv(Select *sel, Hchan *c, void *elem, int32 index)
{
// nil cases do not compete
if(c == nil)
@@ -743,11 +786,11 @@
// selectrecv2(sel *byte, hchan *chan any, elem *any, received *bool) (selected bool);
-void runtime_selectrecv2(Select *, Hchan *, void *, bool *, int)
+void runtime_selectrecv2(Select *, Hchan *, void *, bool *, int32)
__asm__("runtime.selectrecv2");
void
-runtime_selectrecv2(Select *sel, Hchan *c, void *elem, bool *received, int index)
+runtime_selectrecv2(Select *sel, Hchan *c, void *elem, bool *received, int32 index)
{
// nil cases do not compete
if(c == nil)
@@ -784,16 +827,16 @@
// selectdefault(sel *byte) (selected bool);
-void runtime_selectdefault(Select *, int) __asm__("runtime.selectdefault");
+void runtime_selectdefault(Select *, int32) __asm__("runtime.selectdefault");
void
-runtime_selectdefault(Select *sel, int index)
+runtime_selectdefault(Select *sel, int32 index)
{
selectdefault(sel, index);
}
static void
-selectdefault(Select *sel, int index)
+selectdefault(Select *sel, int32 index)
{
int32 i;
Scase *cas;
@@ -848,12 +891,7 @@
void
runtime_block(void)
{
- G *g;
-
- g = runtime_g();
- g->status = Gwaiting; // forever
- g->waitreason = "select (no cases)";
- runtime_gosched();
+ runtime_park(nil, nil, "select (no cases)"); // forever
}
static int selectgo(Select**);
@@ -985,10 +1023,7 @@
}
g->param = nil;
- g->status = Gwaiting;
- g->waitreason = "select";
- selunlock(sel);
- runtime_gosched();
+ runtime_park((void(*)(Lock*))selunlock, (Lock*)sel, "select");
sellock(sel);
sg = g->param;
@@ -1029,6 +1064,8 @@
asyncrecv:
// can receive from buffer
+ if(raceenabled)
+ runtime_raceacquire(chanbuf(c, c->recvx));
if(cas->receivedp != nil)
*cas->receivedp = true;
if(cas->sg.elem != nil)
@@ -1049,6 +1086,8 @@
asyncsend:
// can send to buffer
+ if(raceenabled)
+ runtime_racerelease(chanbuf(c, c->sendx));
runtime_memmove(chanbuf(c, c->sendx), cas->sg.elem, c->elemsize);
if(++c->sendx == c->dataqsiz)
c->sendx = 0;
@@ -1065,6 +1104,8 @@
syncrecv:
// can receive from sleeping sender (sg)
+ if(raceenabled)
+ racesync(c, sg);
selunlock(sel);
if(debug)
runtime_printf("syncrecv: sel=%p c=%p o=%d\n", sel, c, o);
@@ -1084,10 +1125,14 @@
*cas->receivedp = false;
if(cas->sg.elem != nil)
runtime_memclr(cas->sg.elem, c->elemsize);
+ if(raceenabled)
+ runtime_raceacquire(c);
goto retc;
syncsend:
// can send to sleeping receiver (sg)
+ if(raceenabled)
+ racesync(c, sg);
selunlock(sel);
if(debug)
runtime_printf("syncsend: sel=%p c=%p o=%d\n", sel, c, o);
@@ -1110,6 +1155,102 @@
return 0; // not reached
}
+// This struct must match ../reflect/value.go:/runtimeSelect.
+typedef struct runtimeSelect runtimeSelect;
+struct runtimeSelect
+{
+ uintptr dir;
+ ChanType *typ;
+ Hchan *ch;
+ uintptr val;
+};
+
+// This enum must match ../reflect/value.go:/SelectDir.
+enum SelectDir {
+ SelectSend = 1,
+ SelectRecv,
+ SelectDefault,
+};
+
+struct rselect_ret {
+ intgo chosen;
+ uintptr word;
+ bool recvOK;
+};
+
+// func rselect(cases []runtimeSelect) (chosen int, word uintptr, recvOK bool)
+
+struct rselect_ret reflect_rselect(Slice)
+ asm("reflect.rselect");
+
+struct rselect_ret
+reflect_rselect(Slice cases)
+{
+ struct rselect_ret ret;
+ int32 i;
+ Select *sel;
+ runtimeSelect* rcase, *rc;
+ void *elem;
+ void *recvptr;
+ uintptr maxsize;
+ bool onlyptr;
+
+ ret.chosen = -1;
+ ret.word = 0;
+ ret.recvOK = false;
+
+ maxsize = 0;
+ onlyptr = true;
+ rcase = (runtimeSelect*)cases.__values;
+ for(i=0; i<cases.__count; i++) {
+ rc = &rcase[i];
+ if(rc->dir == SelectRecv && rc->ch != nil) {
+ if(maxsize < rc->typ->__element_type->__size)
+ maxsize = rc->typ->__element_type->__size;
+ if(!__go_is_pointer_type(rc->typ->__element_type))
+ onlyptr = false;
+ }
+ }
+
+ recvptr = nil;
+ if(!onlyptr)
+ recvptr = runtime_mal(maxsize);
+
+ newselect(cases.__count, &sel);
+ for(i=0; i<cases.__count; i++) {
+ rc = &rcase[i];
+ switch(rc->dir) {
+ case SelectDefault:
+ selectdefault(sel, i);
+ break;
+ case SelectSend:
+ if(rc->ch == nil)
+ break;
+ if(!__go_is_pointer_type(rc->typ->__element_type))
+ elem = (void*)rc->val;
+ else
+ elem = (void*)&rc->val;
+ selectsend(sel, rc->ch, i, elem);
+ break;
+ case SelectRecv:
+ if(rc->ch == nil)
+ break;
+ if(!__go_is_pointer_type(rc->typ->__element_type))
+ elem = recvptr;
+ else
+ elem = &ret.word;
+ selectrecv(sel, rc->ch, i, elem, &ret.recvOK);
+ break;
+ }
+ }
+
+ ret.chosen = (intgo)(uintptr)selectgo(&sel);
+ if(rcase[ret.chosen].dir == SelectRecv && !__go_is_pointer_type(rcase[ret.chosen].typ->__element_type))
+ ret.word = (uintptr)recvptr;
+
+ return ret;
+}
+
// closechan(sel *byte);
void
runtime_closechan(Hchan *c)
@@ -1129,6 +1270,11 @@
runtime_panicstring("close of closed channel");
}
+ if(raceenabled) {
+ runtime_racewritepc(c, runtime_getcallerpc(&c));
+ runtime_racerelease(c);
+ }
+
c->closed = true;
// release all readers
@@ -1172,15 +1318,15 @@
}
// For reflect
-// func chanlen(c chan) (len int32)
+// func chanlen(c chan) (len int)
-int32 reflect_chanlen(uintptr) __asm__("reflect.chanlen");
+intgo reflect_chanlen(uintptr) __asm__("reflect.chanlen");
-int32
+intgo
reflect_chanlen(uintptr ca)
{
Hchan *c;
- int32 len;
+ intgo len;
c = (Hchan*)ca;
if(c == nil)
@@ -1190,22 +1336,22 @@
return len;
}
-int
+intgo
__go_chan_len(Hchan *c)
{
return reflect_chanlen((uintptr)c);
}
// For reflect
-// func chancap(c chan) (cap int32)
+// func chancap(c chan) (cap intgo)
-int32 reflect_chancap(uintptr) __asm__("reflect.chancap");
+intgo reflect_chancap(uintptr) __asm__("reflect.chancap");
-int32
+intgo
reflect_chancap(uintptr ca)
{
Hchan *c;
- int32 cap;
+ intgo cap;
c = (Hchan*)ca;
if(c == nil)
@@ -1215,7 +1361,7 @@
return cap;
}
-int
+intgo
__go_chan_cap(Hchan *c)
{
return reflect_chancap((uintptr)c);
@@ -1273,3 +1419,12 @@
q->last->link = sgp;
q->last = sgp;
}
+
+static void
+racesync(Hchan *c, SudoG *sg)
+{
+ runtime_racerelease(chanbuf(c, 0));
+ runtime_raceacquireg(sg->g, chanbuf(c, 0));
+ runtime_racereleaseg(sg->g, chanbuf(c, 0));
+ runtime_raceacquire(chanbuf(c, 0));
+}
@@ -130,7 +130,7 @@
// SetCPUProfileRate sets the CPU profiling rate.
// The user documentation is in debug.go.
void
-runtime_SetCPUProfileRate(int32 hz)
+runtime_SetCPUProfileRate(intgo hz)
{
uintptr *p;
uintptr n;
@@ -15,7 +15,7 @@
int
__go_get_rune (const unsigned char *str, size_t len, int *rune)
{
- int c, c1, c2, c3;
+ int c, c1, c2, c3, l;
/* Default to the "replacement character". */
*rune = 0xfffd;
@@ -37,8 +37,10 @@
if ((c & 0xe0) == 0xc0
&& (c1 & 0xc0) == 0x80)
{
- *rune = (((c & 0x1f) << 6)
- + (c1 & 0x3f));
+ l = (((c & 0x1f) << 6) + (c1 & 0x3f));
+ if (l <= 0x7f)
+ return 1;
+ *rune = l;
return 2;
}
@@ -50,17 +52,21 @@
&& (c1 & 0xc0) == 0x80
&& (c2 & 0xc0) == 0x80)
{
- *rune = (((c & 0xf) << 12)
- + ((c1 & 0x3f) << 6)
- + (c2 & 0x3f));
+ l = (((c & 0xf) << 12)
+ + ((c1 & 0x3f) << 6)
+ + (c2 & 0x3f));
- if (*rune >= 0xd800 && *rune < 0xe000)
+ if (l <= 0x7ff)
+ return 1;
+
+ if (l >= 0xd800 && l < 0xe000)
{
/* Invalid surrogate half; return replace character. */
- *rune = 0xfffd;
return 1;
}
+ *rune = l;
+
return 3;
}
@@ -73,10 +79,15 @@
&& (c2 & 0xc0) == 0x80
&& (c3 & 0xc0) == 0x80)
{
- *rune = (((c & 0x7) << 18)
- + ((c1 & 0x3f) << 12)
- + ((c2 & 0x3f) << 6)
- + (c3 & 0x3f));
+ l = (((c & 0x7) << 18)
+ + ((c1 & 0x3f) << 12)
+ + ((c2 & 0x3f) << 6)
+ + (c3 & 0x3f));
+
+ if (l <= 0xffff || l > 0x10ffff)
+ return 1;
+
+ *rune = l;
return 4;
}
@@ -138,6 +138,19 @@
#undef P
#undef D
+
+static int8 badsignal[] = "runtime: signal received on thread not created by Go.\n";
+
+static void
+runtime_badsignal(int32 sig)
+{
+ if (sig == SIGPROF) {
+ return; // Ignore SIGPROFs intended for a non-Go thread.
+ }
+ runtime_write(2, badsignal, sizeof badsignal - 1);
+ runtime_exit(1);
+}
+
/* Handle a signal, for cases where we don't panic. We can split the
stack here. */
@@ -146,6 +159,12 @@
{
int i;
+ if (runtime_m () == NULL)
+ {
+ runtime_badsignal (sig);
+ return;
+ }
+
#ifdef SIGPROF
if (sig == SIGPROF)
{
@@ -106,8 +106,8 @@
no other references to it. */
void
-runtime_trampoline_scan (void (*scan) (byte *, int64))
+runtime_trampoline_scan (void (*addroot) (byte *, uintptr))
{
if (trampoline_page != NULL)
- scan ((byte *) &trampoline_page, sizeof trampoline_page);
+ addroot ((byte *) &trampoline_page, sizeof trampoline_page);
}
@@ -0,0 +1,66 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Lock-free stack.
+
+#include "runtime.h"
+#include "arch.h"
+
+#if __SIZEOF_POINTER__ == 8
+// Amd64 uses 48-bit virtual addresses, 47-th bit is used as kernel/user flag.
+// So we use 17msb of pointers as ABA counter.
+# define PTR_BITS 47
+#else
+# define PTR_BITS 32
+#endif
+#define PTR_MASK ((1ull<<PTR_BITS)-1)
+
+void
+runtime_lfstackpush(uint64 *head, LFNode *node)
+{
+ uint64 old, new;
+
+ if((uintptr)node != ((uintptr)node&PTR_MASK)) {
+ runtime_printf("p=%p\n", node);
+ runtime_throw("runtime_lfstackpush: invalid pointer");
+ }
+
+ node->pushcnt++;
+ new = (uint64)(uintptr)node|(((uint64)node->pushcnt)<<PTR_BITS);
+ old = runtime_atomicload64(head);
+ for(;;) {
+ node->next = (LFNode*)(uintptr)(old&PTR_MASK);
+ if(runtime_cas64(head, &old, new))
+ break;
+ }
+}
+
+LFNode*
+runtime_lfstackpop(uint64 *head)
+{
+ LFNode *node, *node2;
+ uint64 old, new;
+
+ old = runtime_atomicload64(head);
+ for(;;) {
+ if(old == 0)
+ return nil;
+ node = (LFNode*)(uintptr)(old&PTR_MASK);
+ node2 = runtime_atomicloadp(&node->next);
+ new = 0;
+ if(node2 != nil)
+ new = (uint64)(uintptr)node2|(((uint64)node2->pushcnt)<<PTR_BITS);
+ if(runtime_cas64(head, &old, new))
+ return node;
+ }
+}
+
+LFNode* runtime_lfstackpop2(uint64*)
+ asm("runtime.lfstackpop2");
+
+LFNode*
+runtime_lfstackpop2(uint64 *head)
+{
+ return runtime_lfstackpop(head);
+}
@@ -17,12 +17,13 @@
#include "go-string.h"
#include "interface.h"
#include "go-type.h"
+#include "race.h"
MHeap runtime_mheap;
extern MStats mstats; // defined in extern.go
-extern volatile int32 runtime_MemProfileRate
+extern volatile intgo runtime_MemProfileRate
__asm__ ("runtime.MemProfileRate");
// Allocate an object of at least size bytes.
@@ -33,7 +34,8 @@
{
M *m;
G *g;
- int32 sizeclass, rate;
+ int32 sizeclass;
+ intgo rate;
MCache *c;
uintptr npages;
MSpan *s;
@@ -53,6 +55,9 @@
if(size == 0)
size = 1;
+ if(DebugTypeAtBlockEnd)
+ size += sizeof(uintptr);
+
c = m->mcache;
c->local_nmalloc++;
if(size <= MaxSmallSize) {
@@ -72,7 +77,7 @@
npages = size >> PageShift;
if((size & PageMask) != 0)
npages++;
- s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1);
+ s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, zeroed);
if(s == nil)
runtime_throw("out of memory");
size = npages<<PageShift;
@@ -83,9 +88,20 @@
// setup for mark sweep
runtime_markspan(v, 0, 0, true);
}
+
+ if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) {
+ // purge cache stats to prevent overflow
+ runtime_lock(&runtime_mheap);
+ runtime_purgecachedstats(c);
+ runtime_unlock(&runtime_mheap);
+ }
+
if(!(flag & FlagNoGC))
runtime_markallocated(v, size, (flag&FlagNoPointers) != 0);
+ if(DebugTypeAtBlockEnd)
+ *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = 0;
+
m->mallocing = 0;
if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) {
@@ -107,6 +123,11 @@
if(dogc && mstats.heap_alloc >= mstats.next_gc)
runtime_gc(0);
+
+ if(raceenabled) {
+ runtime_racemalloc(v, size, m->racepc);
+ m->racepc = nil;
+ }
return v;
}
@@ -144,6 +165,9 @@
}
prof = runtime_blockspecial(v);
+ if(raceenabled)
+ runtime_racefree(v);
+
// Find size class for v.
sizeclass = s->sizeclass;
c = m->mcache;
@@ -178,11 +202,21 @@
int32
runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
{
+ M *m;
uintptr n, i;
byte *p;
MSpan *s;
- runtime_m()->mcache->local_nlookup++;
+ m = runtime_m();
+
+ m->mcache->local_nlookup++;
+ if (sizeof(void*) == 4 && m->mcache->local_nlookup >= (1<<30)) {
+ // purge cache stats to prevent overflow
+ runtime_lock(&runtime_mheap);
+ runtime_purgecachedstats(m->mcache);
+ runtime_unlock(&runtime_mheap);
+ }
+
s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
if(sp)
*sp = s;
@@ -210,7 +244,7 @@
return 0;
}
- n = runtime_class_to_size[s->sizeclass];
+ n = s->elemsize;
if(base) {
i = ((byte*)v - p)/n;
*base = p + i*n;
@@ -224,7 +258,7 @@
MCache*
runtime_allocmcache(void)
{
- int32 rate;
+ intgo rate;
MCache *c;
runtime_lock(&runtime_mheap);
@@ -232,6 +266,7 @@
mstats.mcache_inuse = runtime_mheap.cachealloc.inuse;
mstats.mcache_sys = runtime_mheap.cachealloc.sys;
runtime_unlock(&runtime_mheap);
+ runtime_memclr((byte*)c, sizeof(*c));
// Set first allocation sample size.
rate = runtime_MemProfileRate;
@@ -244,12 +279,19 @@
}
void
-runtime_purgecachedstats(M* m)
+runtime_freemcache(MCache *c)
{
- MCache *c;
+ runtime_MCache_ReleaseAll(c);
+ runtime_lock(&runtime_mheap);
+ runtime_purgecachedstats(c);
+ runtime_FixAlloc_Free(&runtime_mheap.cachealloc, c);
+ runtime_unlock(&runtime_mheap);
+}
+void
+runtime_purgecachedstats(MCache *c)
+{
// Protected by either heap or GC lock.
- c = m->mcache;
mstats.heap_alloc += c->local_cachealloc;
c->local_cachealloc = 0;
mstats.heap_objects += c->local_objects;
@@ -445,6 +487,220 @@
return p;
}
+static Lock settype_lock;
+
+void
+runtime_settype_flush(M *m, bool sysalloc)
+{
+ uintptr *buf, *endbuf;
+ uintptr size, ofs, j, t;
+ uintptr ntypes, nbytes2, nbytes3;
+ uintptr *data2;
+ byte *data3;
+ bool sysalloc3;
+ void *v;
+ uintptr typ, p;
+ MSpan *s;
+
+ buf = m->settype_buf;
+ endbuf = buf + m->settype_bufsize;
+
+ runtime_lock(&settype_lock);
+ while(buf < endbuf) {
+ v = (void*)*buf;
+ *buf = 0;
+ buf++;
+ typ = *buf;
+ buf++;
+
+ // (Manually inlined copy of runtime_MHeap_Lookup)
+ p = (uintptr)v>>PageShift;
+ if(sizeof(void*) == 8)
+ p -= (uintptr)runtime_mheap.arena_start >> PageShift;
+ s = runtime_mheap.map[p];
+
+ if(s->sizeclass == 0) {
+ s->types.compression = MTypes_Single;
+ s->types.data = typ;
+ continue;
+ }
+
+ size = s->elemsize;
+ ofs = ((uintptr)v - (s->start<<PageShift)) / size;
+
+ switch(s->types.compression) {
+ case MTypes_Empty:
+ ntypes = (s->npages << PageShift) / size;
+ nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
+
+ if(!sysalloc) {
+ data3 = runtime_mallocgc(nbytes3, FlagNoPointers, 0, 1);
+ } else {
+ data3 = runtime_SysAlloc(nbytes3);
+ if(0) runtime_printf("settype(0->3): SysAlloc(%x) --> %p\n", (uint32)nbytes3, data3);
+ }
+
+ s->types.compression = MTypes_Bytes;
+ s->types.sysalloc = sysalloc;
+ s->types.data = (uintptr)data3;
+
+ ((uintptr*)data3)[1] = typ;
+ data3[8*sizeof(uintptr) + ofs] = 1;
+ break;
+
+ case MTypes_Words:
+ ((uintptr*)s->types.data)[ofs] = typ;
+ break;
+
+ case MTypes_Bytes:
+ data3 = (byte*)s->types.data;
+ for(j=1; j<8; j++) {
+ if(((uintptr*)data3)[j] == typ) {
+ break;
+ }
+ if(((uintptr*)data3)[j] == 0) {
+ ((uintptr*)data3)[j] = typ;
+ break;
+ }
+ }
+ if(j < 8) {
+ data3[8*sizeof(uintptr) + ofs] = j;
+ } else {
+ ntypes = (s->npages << PageShift) / size;
+ nbytes2 = ntypes * sizeof(uintptr);
+
+ if(!sysalloc) {
+ data2 = runtime_mallocgc(nbytes2, FlagNoPointers, 0, 1);
+ } else {
+ data2 = runtime_SysAlloc(nbytes2);
+ if(0) runtime_printf("settype.(3->2): SysAlloc(%x) --> %p\n", (uint32)nbytes2, data2);
+ }
+
+ sysalloc3 = s->types.sysalloc;
+
+ s->types.compression = MTypes_Words;
+ s->types.sysalloc = sysalloc;
+ s->types.data = (uintptr)data2;
+
+ // Move the contents of data3 to data2. Then deallocate data3.
+ for(j=0; j<ntypes; j++) {
+ t = data3[8*sizeof(uintptr) + j];
+ t = ((uintptr*)data3)[t];
+ data2[j] = t;
+ }
+ if(sysalloc3) {
+ nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
+ if(0) runtime_printf("settype.(3->2): SysFree(%p,%x)\n", data3, (uint32)nbytes3);
+ runtime_SysFree(data3, nbytes3);
+ }
+
+ data2[ofs] = typ;
+ }
+ break;
+ }
+ }
+ runtime_unlock(&settype_lock);
+
+ m->settype_bufsize = 0;
+}
+
+// It is forbidden to use this function if it is possible that
+// explicit deallocation via calling runtime_free(v) may happen.
+void
+runtime_settype(void *v, uintptr t)
+{
+ M *m1;
+ uintptr *buf;
+ uintptr i;
+ MSpan *s;
+
+ if(t == 0)
+ runtime_throw("settype: zero type");
+
+ m1 = runtime_m();
+ buf = m1->settype_buf;
+ i = m1->settype_bufsize;
+ buf[i+0] = (uintptr)v;
+ buf[i+1] = t;
+ i += 2;
+ m1->settype_bufsize = i;
+
+ if(i == nelem(m1->settype_buf)) {
+ runtime_settype_flush(m1, false);
+ }
+
+ if(DebugTypeAtBlockEnd) {
+ s = runtime_MHeap_Lookup(&runtime_mheap, v);
+ *(uintptr*)((uintptr)v+s->elemsize-sizeof(uintptr)) = t;
+ }
+}
+
+void
+runtime_settype_sysfree(MSpan *s)
+{
+ uintptr ntypes, nbytes;
+
+ if(!s->types.sysalloc)
+ return;
+
+ nbytes = (uintptr)-1;
+
+ switch (s->types.compression) {
+ case MTypes_Words:
+ ntypes = (s->npages << PageShift) / s->elemsize;
+ nbytes = ntypes * sizeof(uintptr);
+ break;
+ case MTypes_Bytes:
+ ntypes = (s->npages << PageShift) / s->elemsize;
+ nbytes = 8*sizeof(uintptr) + 1*ntypes;
+ break;
+ }
+
+ if(nbytes != (uintptr)-1) {
+ if(0) runtime_printf("settype: SysFree(%p,%x)\n", (void*)s->types.data, (uint32)nbytes);
+ runtime_SysFree((void*)s->types.data, nbytes);
+ }
+}
+
+uintptr
+runtime_gettype(void *v)
+{
+ MSpan *s;
+ uintptr t, ofs;
+ byte *data;
+
+ s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
+ if(s != nil) {
+ t = 0;
+ switch(s->types.compression) {
+ case MTypes_Empty:
+ break;
+ case MTypes_Single:
+ t = s->types.data;
+ break;
+ case MTypes_Words:
+ ofs = (uintptr)v - (s->start<<PageShift);
+ t = ((uintptr*)s->types.data)[ofs/s->elemsize];
+ break;
+ case MTypes_Bytes:
+ ofs = (uintptr)v - (s->start<<PageShift);
+ data = (byte*)s->types.data;
+ t = data[8*sizeof(uintptr) + ofs/s->elemsize];
+ t = ((uintptr*)data)[t];
+ break;
+ default:
+ runtime_throw("runtime_gettype: invalid compression kind");
+ }
+ if(0) {
+ runtime_lock(&settype_lock);
+ runtime_printf("%p -> %d,%X\n", v, (int32)s->types.compression, (int64)t);
+ runtime_unlock(&settype_lock);
+ }
+ return t;
+ }
+ return 0;
+}
+
// Runtime stubs.
void*
@@ -453,9 +709,24 @@
return runtime_mallocgc(n, 0, 1, 1);
}
-func new(typ *Type) (ret *uint8) {
- uint32 flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0;
+void *
+runtime_new(Type *typ)
+{
+ void *ret;
+ uint32 flag;
+
+ runtime_m()->racepc = runtime_getcallerpc(&typ);
+ flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0;
ret = runtime_mallocgc(typ->__size, flag, 1, 1);
+
+ if(UseSpanType && !flag) {
+ if(false) {
+ runtime_printf("new %S: %p\n", *typ->__reflection, ret);
+ }
+ runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject);
+ }
+
+ return ret;
}
func GC() {
@@ -85,6 +85,7 @@
typedef struct MSpan MSpan;
typedef struct MStats MStats;
typedef struct MLink MLink;
+typedef struct MTypes MTypes;
enum
{
@@ -124,8 +125,8 @@
// Max number of threads to run garbage collection.
// 2, 3, and 4 are all plausible maximums depending
// on the hardware details of the machine. The garbage
- // collector scales well to 4 cpus.
- MaxGcproc = 4,
+ // collector scales well to 8 cpus.
+ MaxGcproc = 8,
};
// Maximum memory allocation size, a hint for callers.
@@ -282,19 +283,19 @@
struct MCache
{
MCacheList list[NumSizeClasses];
- uint64 size;
- int64 local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap
- int64 local_objects; // objects allocated (or freed) from cache since last lock of heap
- int64 local_alloc; // bytes allocated (or freed) since last lock of heap
- int64 local_total_alloc; // bytes allocated (even if freed) since last lock of heap
- int64 local_nmalloc; // number of mallocs since last lock of heap
- int64 local_nfree; // number of frees since last lock of heap
- int64 local_nlookup; // number of pointer lookups since last lock of heap
+ uintptr size;
+ intptr local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap
+ intptr local_objects; // objects allocated (or freed) from cache since last lock of heap
+ intptr local_alloc; // bytes allocated (or freed) since last lock of heap
+ uintptr local_total_alloc; // bytes allocated (even if freed) since last lock of heap
+ uintptr local_nmalloc; // number of mallocs since last lock of heap
+ uintptr local_nfree; // number of frees since last lock of heap
+ uintptr local_nlookup; // number of pointer lookups since last lock of heap
int32 next_sample; // trigger heap sample after allocating this many bytes
// Statistics about allocation size classes since last lock of heap
struct {
- int64 nmalloc;
- int64 nfree;
+ uintptr nmalloc;
+ uintptr nfree;
} local_by_size[NumSizeClasses];
};
@@ -303,6 +304,44 @@
void runtime_MCache_Free(MCache *c, void *p, int32 sizeclass, uintptr size);
void runtime_MCache_ReleaseAll(MCache *c);
+// MTypes describes the types of blocks allocated within a span.
+// The compression field describes the layout of the data.
+//
+// MTypes_Empty:
+// All blocks are free, or no type information is available for
+// allocated blocks.
+// The data field has no meaning.
+// MTypes_Single:
+// The span contains just one block.
+// The data field holds the type information.
+// The sysalloc field has no meaning.
+// MTypes_Words:
+// The span contains multiple blocks.
+// The data field points to an array of type [NumBlocks]uintptr,
+// and each element of the array holds the type of the corresponding
+// block.
+// MTypes_Bytes:
+// The span contains at most seven different types of blocks.
+// The data field points to the following structure:
+// struct {
+// type [8]uintptr // type[0] is always 0
+// index [NumBlocks]byte
+// }
+// The type of the i-th block is: data.type[data.index[i]]
+enum
+{
+ MTypes_Empty = 0,
+ MTypes_Single = 1,
+ MTypes_Words = 2,
+ MTypes_Bytes = 3,
+};
+struct MTypes
+{
+ byte compression; // one of MTypes_*
+ bool sysalloc; // whether (void*)data is from runtime_SysAlloc
+ uintptr data;
+};
+
// An MSpan is a run of pages.
enum
{
@@ -315,16 +354,17 @@
{
MSpan *next; // in a span linked list
MSpan *prev; // in a span linked list
- MSpan *allnext; // in the list of all spans
PageID start; // starting page number
uintptr npages; // number of pages in span
MLink *freelist; // list of free objects
uint32 ref; // number of allocated objects in this span
uint32 sizeclass; // size class
+ uintptr elemsize; // computed from sizeclass or from npages
uint32 state; // MSpanInUse etc
int64 unusedsince; // First time spotted by GC in MSpanFree state
uintptr npreleased; // number of pages released to the OS
byte *limit; // end of data in span
+ MTypes types; // types of allocated objects in this span
};
void runtime_MSpan_Init(MSpan *span, PageID start, uintptr npages);
@@ -351,6 +391,7 @@
void runtime_MCentral_Init(MCentral *c, int32 sizeclass);
int32 runtime_MCentral_AllocList(MCentral *c, int32 n, MLink **first);
void runtime_MCentral_FreeList(MCentral *c, int32 n, MLink *first);
+void runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end);
// Main malloc heap.
// The heap itself is the "free[]" and "large" arrays,
@@ -360,7 +401,9 @@
Lock;
MSpan free[MaxMHeapList]; // free lists of given length
MSpan large; // free lists length >= MaxMHeapList
- MSpan *allspans;
+ MSpan **allspans;
+ uint32 nspan;
+ uint32 nspancap;
// span lookup
MSpan *map[1<<MHeapMap_Bits];
@@ -387,7 +430,7 @@
extern MHeap runtime_mheap;
void runtime_MHeap_Init(MHeap *h, void *(*allocator)(uintptr));
-MSpan* runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct);
+MSpan* runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 zeroed);
void runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct);
MSpan* runtime_MHeap_Lookup(MHeap *h, void *v);
MSpan* runtime_MHeap_LookupMaybe(MHeap *h, void *v);
@@ -408,7 +451,12 @@
void runtime_unmarkspan(void *v, uintptr size);
bool runtime_blockspecial(void*);
void runtime_setblockspecial(void*, bool);
-void runtime_purgecachedstats(M*);
+void runtime_purgecachedstats(MCache*);
+
+void runtime_settype(void*, uintptr);
+void runtime_settype_flush(M*, bool);
+void runtime_settype_sysfree(MSpan*);
+uintptr runtime_gettype(void*);
enum
{
@@ -421,10 +469,21 @@
void runtime_MProf_Malloc(void*, uintptr);
void runtime_MProf_Free(void*, uintptr);
void runtime_MProf_GC(void);
-void runtime_MProf_Mark(void (*scan)(byte *, int64));
-int32 runtime_helpgc(bool*);
+void runtime_MProf_Mark(void (*addroot)(byte *, uintptr));
+int32 runtime_gcprocs(void);
+void runtime_helpgc(int32 nproc);
void runtime_gchelper(void);
struct __go_func_type;
bool runtime_getfinalizer(void *p, bool del, void (**fn)(void*), const struct __go_func_type **ft);
-void runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64));
+void runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, uintptr));
+
+enum
+{
+ TypeInfo_SingleObject = 0,
+ TypeInfo_Array = 1,
+ TypeInfo_Map = 2,
+
+ // Enables type information at the end of blocks allocated from heap
+ DebugTypeAtBlockEnd = 0,
+};
@@ -43,11 +43,6 @@
// block is zeroed iff second word is zero ...
if(size > sizeof(uintptr) && ((uintptr*)v)[1] != 0)
runtime_memclr((byte*)v, size);
- else {
- // ... except for the link pointer
- // that we used above; zero that.
- v->next = nil;
- }
}
c->local_cachealloc += size;
c->local_objects++;
@@ -88,9 +88,6 @@
}
// Free n objects back into the central free list.
-// Return the number of objects allocated.
-// The objects are linked together by their first words.
-// On return, *pstart points at the first object and *pend at the last.
void
runtime_MCentral_FreeList(MCentral *c, int32 n, MLink *start)
{
@@ -148,6 +145,42 @@
}
}
+// Free n objects from a span s back into the central free list c.
+// Called from GC.
+void
+runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end)
+{
+ int32 size;
+
+ runtime_lock(c);
+
+ // Move to nonempty if necessary.
+ if(s->freelist == nil) {
+ runtime_MSpanList_Remove(s);
+ runtime_MSpanList_Insert(&c->nonempty, s);
+ }
+
+ // Add the objects back to s's free list.
+ end->next = s->freelist;
+ s->freelist = start;
+ s->ref -= n;
+ c->nfree += n;
+
+ // If s is completely freed, return it to the heap.
+ if(s->ref == 0) {
+ size = runtime_class_to_size[c->sizeclass];
+ runtime_MSpanList_Remove(s);
+ *(uintptr*)(s->start<<PageShift) = 1; // needs zeroing
+ s->freelist = nil;
+ c->nfree -= (s->npages << PageShift) / size;
+ runtime_unlock(c);
+ runtime_unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
+ runtime_MHeap_Free(&runtime_mheap, s, 0);
+ } else {
+ runtime_unlock(c);
+ }
+}
+
void
runtime_MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj)
{
@@ -174,7 +207,7 @@
runtime_unlock(c);
runtime_MGetSizeClassInfo(c->sizeclass, &size, &npages, &n);
- s = runtime_MHeap_Alloc(&runtime_mheap, npages, c->sizeclass, 0);
+ s = runtime_MHeap_Alloc(&runtime_mheap, npages, c->sizeclass, 0, 1);
if(s == nil) {
// TODO(rsc): Log out of memory
runtime_lock(c);
@@ -193,7 +193,7 @@
}
void
-runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64))
+runtime_walkfintab(void (*fn)(void*), void (*addroot)(byte *, uintptr))
{
void **key;
void **ekey;
@@ -206,8 +206,8 @@
for(; key < ekey; key++)
if(*key != nil && *key != ((void*)-1))
fn(*key);
- scan((byte*)&fintab[i].fkey, sizeof(void*));
- scan((byte*)&fintab[i].val, sizeof(void*));
+ addroot((byte*)&fintab[i].fkey, sizeof(void*));
+ addroot((byte*)&fintab[i].val, sizeof(void*));
runtime_unlock(&fintab[i]);
}
}
@@ -9,6 +9,7 @@
#include "runtime.h"
#include "arch.h"
#include "malloc.h"
+#include "race.h"
#ifdef USING_SPLIT_STACK
@@ -22,8 +23,8 @@
enum {
Debug = 0,
- PtrSize = sizeof(void*),
DebugMark = 0, // run second pass to check mark
+ DataBlock = 8*1024,
// Four bits per word (see #defines below).
wordsPerBitmapWord = sizeof(void*)*8/4,
@@ -78,17 +79,14 @@
//
uint32 runtime_worldsema = 1;
-// TODO: Make these per-M.
-static uint64 nhandoff;
-
static int32 gctrace;
typedef struct Workbuf Workbuf;
struct Workbuf
{
- Workbuf *next;
+ LFNode node; // must be first
uintptr nobj;
- byte *obj[512-2];
+ byte *obj[512-(sizeof(LFNode)+sizeof(uintptr))/sizeof(byte*)];
};
typedef struct Finalizer Finalizer;
@@ -122,22 +120,32 @@
static void putempty(Workbuf*);
static Workbuf* handoff(Workbuf*);
+typedef struct GcRoot GcRoot;
+struct GcRoot
+{
+ byte *p;
+ uintptr n;
+};
+
static struct {
- Lock fmu;
- Workbuf *full;
- Lock emu;
- Workbuf *empty;
+ uint64 full; // lock-free list of full blocks
+ uint64 empty; // lock-free list of empty blocks
+ byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
uint32 nproc;
volatile uint32 nwait;
volatile uint32 ndone;
+ volatile uint32 debugmarkdone;
Note alldone;
- Lock markgate;
- Lock sweepgate;
- MSpan *spans;
+ ParFor *markfor;
+ ParFor *sweepfor;
Lock;
byte *chunk;
uintptr nchunk;
+
+ GcRoot *roots;
+ uint32 nroot;
+ uint32 rootcap;
} work;
// scanblock scans a block of n bytes starting at pointer b for references
@@ -147,7 +155,7 @@
// body. Keeping an explicit work list is easier on the stack allocator and
// more efficient.
static void
-scanblock(byte *b, int64 n)
+scanblock(byte *b, uintptr n)
{
byte *obj, *arena_start, *arena_used, *p;
void **vp;
@@ -158,8 +166,8 @@
Workbuf *wbuf;
bool keepworking;
- if((int64)(uintptr)n != n || n < 0) {
- runtime_printf("scanblock %p %D\n", b, n);
+ if((intptr)n < 0) {
+ runtime_printf("scanblock %p %D\n", b, (int64)n);
runtime_throw("scanblock");
}
@@ -173,7 +181,7 @@
nobj = 0; // number of queued objects
// Scanblock helpers pass b==nil.
- // The main proc needs to return to make more
+ // Procs needs to return to make more
// calls to scanblock. But if work.nproc==1 then
// might as well process blocks as soon as we
// have them.
@@ -190,7 +198,7 @@
// Each iteration scans the block b of length n, queueing pointers in
// the work buffer.
if(Debug > 1)
- runtime_printf("scanblock %p %D\n", b, n);
+ runtime_printf("scanblock %p %D\n", b, (int64)n);
vp = (void**)b;
n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
@@ -257,6 +265,14 @@
bits = xbits >> shift;
found:
+ // If another proc wants a pointer, give it some.
+ if(work.nwait > 0 && nobj > 4 && work.full == 0) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = (void**)(wbuf->obj + nobj);
+ }
+
// Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object.
// Only care about allocated and not marked.
@@ -278,13 +294,7 @@
if((bits & bitNoPointers) != 0)
continue;
- // If another proc wants a pointer, give it some.
- if(nobj > 4 && work.nwait > 0 && work.full == nil) {
- wbuf->nobj = nobj;
- wbuf = handoff(wbuf);
- nobj = wbuf->nobj;
- wp = (void**)(wbuf->obj + nobj);
- }
+ PREFETCH(obj);
// If buffer is full, get a new one.
if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
@@ -305,7 +315,8 @@
// Fetch b from the work buffer.
if(nobj == 0) {
if(!keepworking) {
- putempty(wbuf);
+ if(wbuf)
+ putempty(wbuf);
return;
}
// Emptied our buffer: refill.
@@ -335,7 +346,7 @@
// it is simpler, slower, single-threaded, recursive,
// and uses bitSpecial as the mark bit.
static void
-debug_scanblock(byte *b, int64 n)
+debug_scanblock(byte *b, uintptr n)
{
byte *obj, *p;
void **vp;
@@ -345,8 +356,8 @@
if(!DebugMark)
runtime_throw("debug_scanblock without DebugMark");
- if((int64)(uintptr)n != n || n < 0) {
- runtime_printf("debug_scanblock %p %D\n", b, n);
+ if((intptr)n < 0) {
+ runtime_printf("debug_scanblock %p %D\n", b, (int64)n);
runtime_throw("debug_scanblock");
}
@@ -374,7 +385,6 @@
if(s == nil)
continue;
-
p = (byte*)((uintptr)s->start<<PageShift);
if(s->sizeclass == 0) {
obj = p;
@@ -411,53 +421,33 @@
}
}
+static void
+markroot(ParFor *desc, uint32 i)
+{
+ USED(&desc);
+ scanblock(work.roots[i].p, work.roots[i].n);
+}
+
// Get an empty work buffer off the work.empty list,
// allocating new buffers as needed.
static Workbuf*
getempty(Workbuf *b)
{
- if(work.nproc == 1) {
- // Put b on full list.
- if(b != nil) {
- b->next = work.full;
- work.full = b;
+ if(b != nil)
+ runtime_lfstackpush(&work.full, &b->node);
+ b = (Workbuf*)runtime_lfstackpop(&work.empty);
+ if(b == nil) {
+ // Need to allocate.
+ runtime_lock(&work);
+ if(work.nchunk < sizeof *b) {
+ work.nchunk = 1<<20;
+ work.chunk = runtime_SysAlloc(work.nchunk);
}
- // Grab from empty list if possible.
- b = work.empty;
- if(b != nil) {
- work.empty = b->next;
- goto haveb;
- }
- } else {
- // Put b on full list.
- if(b != nil) {
- runtime_lock(&work.fmu);
- b->next = work.full;
- work.full = b;
- runtime_unlock(&work.fmu);
- }
- // Grab from empty list if possible.
- runtime_lock(&work.emu);
- b = work.empty;
- if(b != nil)
- work.empty = b->next;
- runtime_unlock(&work.emu);
- if(b != nil)
- goto haveb;
+ b = (Workbuf*)work.chunk;
+ work.chunk += sizeof *b;
+ work.nchunk -= sizeof *b;
+ runtime_unlock(&work);
}
-
- // Need to allocate.
- runtime_lock(&work);
- if(work.nchunk < sizeof *b) {
- work.nchunk = 1<<20;
- work.chunk = runtime_SysAlloc(work.nchunk);
- }
- b = (Workbuf*)work.chunk;
- work.chunk += sizeof *b;
- work.nchunk -= sizeof *b;
- runtime_unlock(&work);
-
-haveb:
b->nobj = 0;
return b;
}
@@ -465,112 +455,95 @@
static void
putempty(Workbuf *b)
{
- if(b == nil)
- return;
-
- if(work.nproc == 1) {
- b->next = work.empty;
- work.empty = b;
- return;
- }
-
- runtime_lock(&work.emu);
- b->next = work.empty;
- work.empty = b;
- runtime_unlock(&work.emu);
+ runtime_lfstackpush(&work.empty, &b->node);
}
// Get a full work buffer off the work.full list, or return nil.
static Workbuf*
getfull(Workbuf *b)
{
+ M *m;
int32 i;
- Workbuf *b1;
- if(work.nproc == 1) {
- // Put b on empty list.
- if(b != nil) {
- b->next = work.empty;
- work.empty = b;
- }
- // Grab from full list if possible.
- // Since work.nproc==1, no one else is
- // going to give us work.
- b = work.full;
- if(b != nil)
- work.full = b->next;
+ if(b != nil)
+ runtime_lfstackpush(&work.empty, &b->node);
+ b = (Workbuf*)runtime_lfstackpop(&work.full);
+ if(b != nil || work.nproc == 1)
return b;
- }
- putempty(b);
-
- // Grab buffer from full list if possible.
- for(;;) {
- b1 = work.full;
- if(b1 == nil)
- break;
- runtime_lock(&work.fmu);
- if(work.full != nil) {
- b1 = work.full;
- work.full = b1->next;
- runtime_unlock(&work.fmu);
- return b1;
- }
- runtime_unlock(&work.fmu);
- }
-
+ m = runtime_m();
runtime_xadd(&work.nwait, +1);
for(i=0;; i++) {
- b1 = work.full;
- if(b1 != nil) {
- runtime_lock(&work.fmu);
- if(work.full != nil) {
- runtime_xadd(&work.nwait, -1);
- b1 = work.full;
- work.full = b1->next;
- runtime_unlock(&work.fmu);
- return b1;
- }
- runtime_unlock(&work.fmu);
- continue;
+ if(work.full != 0) {
+ runtime_xadd(&work.nwait, -1);
+ b = (Workbuf*)runtime_lfstackpop(&work.full);
+ if(b != nil)
+ return b;
+ runtime_xadd(&work.nwait, +1);
}
if(work.nwait == work.nproc)
return nil;
- if(i < 10)
+ if(i < 10) {
+ m->gcstats.nprocyield++;
runtime_procyield(20);
- else if(i < 20)
+ } else if(i < 20) {
+ m->gcstats.nosyield++;
runtime_osyield();
- else
+ } else {
+ m->gcstats.nsleep++;
runtime_usleep(100);
+ }
}
}
static Workbuf*
handoff(Workbuf *b)
{
+ M *m;
int32 n;
Workbuf *b1;
+ m = runtime_m();
+
// Make new buffer with half of b's pointers.
b1 = getempty(nil);
n = b->nobj/2;
b->nobj -= n;
b1->nobj = n;
runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
- nhandoff += n;
+ m->gcstats.nhandoff++;
+ m->gcstats.nhandoffcnt += n;
// Put b on full list - let first half of b get stolen.
- runtime_lock(&work.fmu);
- b->next = work.full;
- work.full = b;
- runtime_unlock(&work.fmu);
-
+ runtime_lfstackpush(&work.full, &b->node);
return b1;
}
-// Scanstack calls scanblock on each of gp's stack segments.
static void
-scanstack(void (*scanblock)(byte*, int64), G *gp)
+addroot(byte *p, uintptr n)
+{
+ uint32 cap;
+ GcRoot *new;
+
+ if(work.nroot >= work.rootcap) {
+ cap = PageSize/sizeof(GcRoot);
+ if(cap < 2*work.rootcap)
+ cap = 2*work.rootcap;
+ new = (GcRoot*)runtime_SysAlloc(cap*sizeof(GcRoot));
+ if(work.roots != nil) {
+ runtime_memmove(new, work.roots, work.rootcap*sizeof(GcRoot));
+ runtime_SysFree(work.roots, work.rootcap*sizeof(GcRoot));
+ }
+ work.roots = new;
+ work.rootcap = cap;
+ }
+ work.roots[work.nroot].p = p;
+ work.roots[work.nroot].n = n;
+ work.nroot++;
+}
+
+static void
+addstackroots(G *gp)
{
#ifdef USING_SPLIT_STACK
M *mp;
@@ -609,11 +582,11 @@
}
}
if(sp != nil) {
- scanblock(sp, spsize);
+ addroot(sp, spsize);
while((sp = __splitstack_find(next_segment, next_sp,
&spsize, &next_segment,
&next_sp, &initial_sp)) != nil)
- scanblock(sp, spsize);
+ addroot(sp, spsize);
}
#else
M *mp;
@@ -635,16 +608,14 @@
}
top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
if(top > bottom)
- scanblock(bottom, top - bottom);
+ addroot(bottom, top - bottom);
else
- scanblock(top, bottom - top);
+ addroot(top, bottom - top);
#endif
}
-// Markfin calls scanblock on the blocks that have finalizers:
-// the things pointed at cannot be freed until the finalizers have run.
static void
-markfin(void *v)
+addfinroots(void *v)
{
uintptr size;
@@ -653,7 +624,7 @@
runtime_throw("mark - finalizer inconsistency");
// do not mark the finalizer block itself. just mark the things it points at.
- scanblock(v, size);
+ addroot(v, size);
}
static struct root_list* roots;
@@ -668,22 +639,15 @@
}
static void
-debug_markfin(void *v)
-{
- uintptr size;
-
- if(!runtime_mlookup(v, (byte**)&v, &size, nil))
- runtime_throw("debug_mark - finalizer inconsistency");
- debug_scanblock(v, size);
-}
-
-// Mark
-static void
-mark(void (*scan)(byte*, int64))
+addroots(void)
{
struct root_list *pl;
G *gp;
FinBlock *fb;
+ MSpan *s, **allspans;
+ uint32 spanidx;
+
+ work.nroot = 0;
// mark data+bss.
for(pl = roots; pl != nil; pl = pl->next) {
@@ -692,20 +656,36 @@
void *decl = pr->decl;
if(decl == nil)
break;
- scanblock(decl, pr->size);
+ addroot(decl, pr->size);
pr++;
}
}
- scan((byte*)&runtime_m0, sizeof runtime_m0);
- scan((byte*)&runtime_g0, sizeof runtime_g0);
- scan((byte*)&runtime_allg, sizeof runtime_allg);
- scan((byte*)&runtime_allm, sizeof runtime_allm);
- runtime_MProf_Mark(scan);
- runtime_time_scan(scan);
- runtime_trampoline_scan(scan);
+ addroot((byte*)&runtime_m0, sizeof runtime_m0);
+ addroot((byte*)&runtime_g0, sizeof runtime_g0);
+ addroot((byte*)&runtime_allg, sizeof runtime_allg);
+ addroot((byte*)&runtime_allm, sizeof runtime_allm);
+ runtime_MProf_Mark(addroot);
+ runtime_time_scan(addroot);
+ runtime_trampoline_scan(addroot);
- // mark stacks
+ // MSpan.types
+ allspans = runtime_mheap.allspans;
+ for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
+ s = allspans[spanidx];
+ if(s->state == MSpanInUse) {
+ switch(s->types.compression) {
+ case MTypes_Empty:
+ case MTypes_Single:
+ break;
+ case MTypes_Words:
+ case MTypes_Bytes:
+ addroot((byte*)&s->types.data, sizeof(void*));
+ break;
+ }
+ }
+ }
+
for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
switch(gp->status){
default:
@@ -716,27 +696,22 @@
case Grunning:
if(gp != runtime_g())
runtime_throw("mark - world not stopped");
- scanstack(scan, gp);
+ addstackroots(gp);
break;
case Grunnable:
case Gsyscall:
case Gwaiting:
- scanstack(scan, gp);
+ addstackroots(gp);
break;
}
}
- // mark things pointed at by objects with finalizers
- if(scan == debug_scanblock)
- runtime_walkfintab(debug_markfin, scan);
- else
- runtime_walkfintab(markfin, scan);
+ runtime_walkfintab(addfinroots, addroot);
for(fb=allfin; fb; fb=fb->alllink)
- scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
+ addroot((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
- // in multiproc mode, join in the queued work.
- scan(nil, 0);
+ addroot((byte*)&work, sizeof work);
}
static bool
@@ -771,122 +746,149 @@
f->fn = fn;
f->ft = ft;
f->arg = p;
- runtime_unlock(&finlock);
+ runtime_unlock(&finlock);
return true;
}
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
static void
-sweep(void)
+sweepspan(ParFor *desc, uint32 idx)
{
M *m;
- MSpan *s;
int32 cl, n, npages;
uintptr size;
byte *p;
MCache *c;
byte *arena_start;
- int64 now;
+ MLink head, *end;
+ int32 nfree;
+ byte *type_data;
+ byte compression;
+ uintptr type_data_inc;
+ MSpan *s;
m = runtime_m();
+
+ USED(&desc);
+ s = runtime_mheap.allspans[idx];
+ // Stamp newly unused spans. The scavenger will use that
+ // info to potentially give back some pages to the OS.
+ if(s->state == MSpanFree && s->unusedsince == 0)
+ s->unusedsince = runtime_nanotime();
+ if(s->state != MSpanInUse)
+ return;
arena_start = runtime_mheap.arena_start;
- now = runtime_nanotime();
+ p = (byte*)(s->start << PageShift);
+ cl = s->sizeclass;
+ size = s->elemsize;
+ if(cl == 0) {
+ n = 1;
+ } else {
+ // Chunk full of small blocks.
+ npages = runtime_class_to_allocnpages[cl];
+ n = (npages << PageShift) / size;
+ }
+ nfree = 0;
+ end = &head;
+ c = m->mcache;
+
+ type_data = (byte*)s->types.data;
+ type_data_inc = sizeof(uintptr);
+ compression = s->types.compression;
+ switch(compression) {
+ case MTypes_Bytes:
+ type_data += 8*sizeof(uintptr);
+ type_data_inc = 1;
+ break;
+ }
- for(;;) {
- s = work.spans;
- if(s == nil)
- break;
- if(!runtime_casp(&work.spans, s, s->allnext))
+ // Sweep through n objects of given size starting at p.
+ // This thread owns the span now, so it can manipulate
+ // the block bitmap without atomic operations.
+ for(; n > 0; n--, p += size, type_data+=type_data_inc) {
+ uintptr off, *bitp, shift, bits;
+
+ off = (uintptr*)p - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ bits = *bitp>>shift;
+
+ if((bits & bitAllocated) == 0)
continue;
- // Stamp newly unused spans. The scavenger will use that
- // info to potentially give back some pages to the OS.
- if(s->state == MSpanFree && s->unusedsince == 0)
- s->unusedsince = now;
-
- if(s->state != MSpanInUse)
+ if((bits & bitMarked) != 0) {
+ if(DebugMark) {
+ if(!(bits & bitSpecial))
+ runtime_printf("found spurious mark on %p\n", p);
+ *bitp &= ~(bitSpecial<<shift);
+ }
+ *bitp &= ~(bitMarked<<shift);
continue;
-
- p = (byte*)(s->start << PageShift);
- cl = s->sizeclass;
- if(cl == 0) {
- size = s->npages<<PageShift;
- n = 1;
- } else {
- // Chunk full of small blocks.
- size = runtime_class_to_size[cl];
- npages = runtime_class_to_allocnpages[cl];
- n = (npages << PageShift) / size;
}
- // Sweep through n objects of given size starting at p.
- // This thread owns the span now, so it can manipulate
- // the block bitmap without atomic operations.
- for(; n > 0; n--, p += size) {
- uintptr off, *bitp, shift, bits;
+ // Special means it has a finalizer or is being profiled.
+ // In DebugMark mode, the bit has been coopted so
+ // we have to assume all blocks are special.
+ if(DebugMark || (bits & bitSpecial) != 0) {
+ if(handlespecial(p, size))
+ continue;
+ }
- off = (uintptr*)p - (uintptr*)arena_start;
- bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
- bits = *bitp>>shift;
+ // Mark freed; restore block boundary bit.
+ *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
- if((bits & bitAllocated) == 0)
- continue;
-
- if((bits & bitMarked) != 0) {
- if(DebugMark) {
- if(!(bits & bitSpecial))
- runtime_printf("found spurious mark on %p\n", p);
- *bitp &= ~(bitSpecial<<shift);
- }
- *bitp &= ~(bitMarked<<shift);
- continue;
- }
-
- // Special means it has a finalizer or is being profiled.
- // In DebugMark mode, the bit has been coopted so
- // we have to assume all blocks are special.
- if(DebugMark || (bits & bitSpecial) != 0) {
- if(handlespecial(p, size))
- continue;
- }
-
- // Mark freed; restore block boundary bit.
- *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
-
- c = m->mcache;
- if(s->sizeclass == 0) {
- // Free large span.
- runtime_unmarkspan(p, 1<<PageShift);
- *(uintptr*)p = 1; // needs zeroing
- runtime_MHeap_Free(&runtime_mheap, s, 1);
- } else {
- // Free small object.
- if(size > sizeof(uintptr))
- ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
- c->local_by_size[s->sizeclass].nfree++;
- runtime_MCache_Free(c, p, s->sizeclass, size);
- }
+ if(cl == 0) {
+ // Free large span.
+ runtime_unmarkspan(p, 1<<PageShift);
+ *(uintptr*)p = 1; // needs zeroing
+ runtime_MHeap_Free(&runtime_mheap, s, 1);
c->local_alloc -= size;
c->local_nfree++;
+ } else {
+ // Free small object.
+ switch(compression) {
+ case MTypes_Words:
+ *(uintptr*)type_data = 0;
+ break;
+ case MTypes_Bytes:
+ *(byte*)type_data = 0;
+ break;
+ }
+ if(size > sizeof(uintptr))
+ ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
+
+ end->next = (MLink*)p;
+ end = (MLink*)p;
+ nfree++;
}
}
+
+ if(nfree) {
+ c->local_by_size[cl].nfree += nfree;
+ c->local_alloc -= size * nfree;
+ c->local_nfree += nfree;
+ c->local_cachealloc -= nfree * size;
+ c->local_objects -= nfree;
+ runtime_MCentral_FreeSpan(&runtime_mheap.central[cl], s, nfree, head.next, end);
+ }
}
void
runtime_gchelper(void)
{
- // Wait until main proc is ready for mark help.
- runtime_lock(&work.markgate);
- runtime_unlock(&work.markgate);
+ // parallel mark for over gc roots
+ runtime_parfordo(work.markfor);
+ // help other threads scan secondary blocks
scanblock(nil, 0);
- // Wait until main proc is ready for sweep help.
- runtime_lock(&work.sweepgate);
- runtime_unlock(&work.sweepgate);
- sweep();
+ if(DebugMark) {
+ // wait while the main thread executes mark(debug_scanblock)
+ while(runtime_atomicload(&work.debugmarkdone) == 0)
+ runtime_usleep(10);
+ }
+ runtime_parfordo(work.sweepfor);
if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
runtime_notewakeup(&work.alldone);
}
@@ -912,21 +914,31 @@
}
static void
-cachestats(void)
+cachestats(GCStats *stats)
{
M *m;
MCache *c;
uint32 i;
uint64 stacks_inuse;
uint64 stacks_sys;
+ uint64 *src, *dst;
+ if(stats)
+ runtime_memclr((byte*)stats, sizeof(*stats));
stacks_inuse = 0;
stacks_sys = runtime_stacks_sys;
for(m=runtime_allm; m; m=m->alllink) {
- runtime_purgecachedstats(m);
+ c = m->mcache;
+ runtime_purgecachedstats(c);
// stacks_inuse += m->stackalloc->inuse;
// stacks_sys += m->stackalloc->sys;
- c = m->mcache;
+ if(stats) {
+ src = (uint64*)&m->gcstats;
+ dst = (uint64*)stats;
+ for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
+ dst[i] += src[i];
+ runtime_memclr((byte*)&m->gcstats, sizeof(m->gcstats));
+ }
for(i=0; i<nelem(c->local_by_size); i++) {
mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
c->local_by_size[i].nmalloc = 0;
@@ -945,7 +957,15 @@
int64 t0, t1, t2, t3;
uint64 heap0, heap1, obj0, obj1;
const byte *p;
- bool extra;
+ GCStats stats;
+ M *m1;
+ uint32 i;
+
+ // The atomic operations are not atomic if the uint64s
+ // are not aligned on uint64 boundaries. This has been
+ // a problem in the past.
+ if((((uintptr)&work.empty) & 7) != 0)
+ runtime_throw("runtime: gc work buffer is misaligned");
// Make sure all registers are saved on stack so that
// scanstack sees them.
@@ -986,48 +1006,67 @@
}
t0 = runtime_nanotime();
- nhandoff = 0;
m->gcing = 1;
runtime_stoptheworld();
- cachestats();
- heap0 = mstats.heap_alloc;
- obj0 = mstats.nmalloc - mstats.nfree;
+ for(m1=runtime_allm; m1; m1=m1->alllink)
+ runtime_settype_flush(m1, false);
- runtime_lock(&work.markgate);
- runtime_lock(&work.sweepgate);
+ heap0 = 0;
+ obj0 = 0;
+ if(gctrace) {
+ cachestats(nil);
+ heap0 = mstats.heap_alloc;
+ obj0 = mstats.nmalloc - mstats.nfree;
+ }
- extra = false;
- work.nproc = 1;
- if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
- runtime_noteclear(&work.alldone);
- work.nproc += runtime_helpgc(&extra);
- }
work.nwait = 0;
work.ndone = 0;
+ work.debugmarkdone = 0;
+ work.nproc = runtime_gcprocs();
+ addroots();
+ m->locks++; // disable gc during mallocs in parforalloc
+ if(work.markfor == nil)
+ work.markfor = runtime_parforalloc(MaxGcproc);
+ runtime_parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
+ if(work.sweepfor == nil)
+ work.sweepfor = runtime_parforalloc(MaxGcproc);
+ runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap.nspan, nil, true, sweepspan);
+ m->locks--;
+ if(work.nproc > 1) {
+ runtime_noteclear(&work.alldone);
+ runtime_helpgc(work.nproc);
+ }
- runtime_unlock(&work.markgate); // let the helpers in
- mark(scanblock);
- if(DebugMark)
- mark(debug_scanblock);
+ runtime_parfordo(work.markfor);
+ scanblock(nil, 0);
+
+ if(DebugMark) {
+ for(i=0; i<work.nroot; i++)
+ debug_scanblock(work.roots[i].p, work.roots[i].n);
+ runtime_atomicstore(&work.debugmarkdone, 1);
+ }
t1 = runtime_nanotime();
- work.spans = runtime_mheap.allspans;
- runtime_unlock(&work.sweepgate); // let the helpers in
- sweep();
- if(work.nproc > 1)
- runtime_notesleep(&work.alldone);
+ runtime_parfordo(work.sweepfor);
t2 = runtime_nanotime();
stealcache();
- cachestats();
+ cachestats(&stats);
+
+ if(work.nproc > 1)
+ runtime_notesleep(&work.alldone);
+
+ stats.nprocyield += work.sweepfor->nprocyield;
+ stats.nosyield += work.sweepfor->nosyield;
+ stats.nsleep += work.sweepfor->nsleep;
mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100;
m->gcing = 0;
- m->locks++; // disable gc during the mallocs in newproc
if(finq != nil) {
+ m->locks++; // disable gc during the mallocs in newproc
// kick off or wake up goroutine to run queued finalizers
if(fing == nil)
fing = __go_go(runfinq, nil);
@@ -1035,10 +1074,9 @@
fingwait = 0;
runtime_ready(fing);
}
+ m->locks--;
}
- m->locks--;
- cachestats();
heap1 = mstats.heap_alloc;
obj1 = mstats.nmalloc - mstats.nfree;
@@ -1051,26 +1089,22 @@
runtime_printf("pause %D\n", t3-t0);
if(gctrace) {
- runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects\n",
+ runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
+ " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
heap0>>20, heap1>>20, obj0, obj1,
- mstats.nmalloc, mstats.nfree);
+ mstats.nmalloc, mstats.nfree,
+ stats.nhandoff, stats.nhandoffcnt,
+ work.sweepfor->nsteal, work.sweepfor->nstealcnt,
+ stats.nprocyield, stats.nosyield, stats.nsleep);
}
-
+
runtime_MProf_GC();
runtime_semrelease(&runtime_worldsema);
+ runtime_starttheworld();
- // If we could have used another helper proc, start one now,
- // in the hope that it will be available next time.
- // It would have been even better to start it before the collection,
- // but doing so requires allocating memory, so it's tricky to
- // coordinate. This lazy approach works out in practice:
- // we don't mind if the first couple gc rounds don't have quite
- // the maximum number of procs.
- runtime_starttheworld(extra);
-
- // give the queued finalizers, if any, a chance to run
- if(finq != nil)
+ // give the queued finalizers, if any, a chance to run
+ if(finq != nil)
runtime_gosched();
if(gctrace > 1 && !force)
@@ -1093,22 +1127,23 @@
m = runtime_m();
m->gcing = 1;
runtime_stoptheworld();
- cachestats();
+ cachestats(nil);
*stats = mstats;
m->gcing = 0;
runtime_semrelease(&runtime_worldsema);
- runtime_starttheworld(false);
+ runtime_starttheworld();
}
static void
runfinq(void* dummy __attribute__ ((unused)))
{
- G* gp;
Finalizer *f;
FinBlock *fb, *next;
uint32 i;
- gp = runtime_g();
+ if(raceenabled)
+ runtime_racefingo();
+
for(;;) {
// There's no need for a lock in this section
// because it only conflicts with the garbage
@@ -1120,9 +1155,7 @@
finq = nil;
if(fb == nil) {
fingwait = 1;
- gp->status = Gwaiting;
- gp->waitreason = "finalizer wait";
- runtime_gosched();
+ runtime_park(nil, nil, "finalizer wait");
continue;
}
for(; fb; fb=next) {
@@ -27,11 +27,24 @@
{
MHeap *h;
MSpan *s;
+ MSpan **all;
+ uint32 cap;
h = vh;
s = (MSpan*)p;
- s->allnext = h->allspans;
- h->allspans = s;
+ if(h->nspan >= h->nspancap) {
+ cap = 64*1024/sizeof(all[0]);
+ if(cap < h->nspancap*3/2)
+ cap = h->nspancap*3/2;
+ all = (MSpan**)runtime_SysAlloc(cap*sizeof(all[0]));
+ if(h->allspans) {
+ runtime_memmove(all, h->allspans, h->nspancap*sizeof(all[0]));
+ runtime_SysFree(h->allspans, h->nspancap*sizeof(all[0]));
+ }
+ h->allspans = all;
+ h->nspancap = cap;
+ }
+ h->allspans[h->nspan++] = s;
}
// Initialize the heap; fetch memory using alloc.
@@ -53,12 +66,12 @@
// Allocate a new span of npage pages from the heap
// and record its size class in the HeapMap and HeapMapCache.
MSpan*
-runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct)
+runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 zeroed)
{
MSpan *s;
runtime_lock(h);
- runtime_purgecachedstats(runtime_m());
+ runtime_purgecachedstats(runtime_m()->mcache);
s = MHeap_AllocLocked(h, npage, sizeclass);
if(s != nil) {
mstats.heap_inuse += npage<<PageShift;
@@ -68,6 +81,8 @@
}
}
runtime_unlock(h);
+ if(s != nil && *(uintptr*)(s->start<<PageShift) != 0 && zeroed)
+ runtime_memclr((byte*)(s->start<<PageShift), s->npages<<PageShift);
return s;
}
@@ -125,12 +140,11 @@
MHeap_FreeLocked(h, t);
}
- if(*(uintptr*)(s->start<<PageShift) != 0)
- runtime_memclr((byte*)(s->start<<PageShift), s->npages<<PageShift);
-
// Record span info, because gc needs to be
// able to map interior pointer to containing span.
s->sizeclass = sizeclass;
+ s->elemsize = (sizeclass==0 ? s->npages<<PageShift : (uintptr)runtime_class_to_size[sizeclass]);
+ s->types.compression = MTypes_Empty;
p = s->start;
if(sizeof(void*) == 8)
p -= ((uintptr)h->arena_start>>PageShift);
@@ -259,7 +273,7 @@
runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct)
{
runtime_lock(h);
- runtime_purgecachedstats(runtime_m());
+ runtime_purgecachedstats(runtime_m()->mcache);
mstats.heap_inuse -= s->npages<<PageShift;
if(acct) {
mstats.heap_alloc -= s->npages<<PageShift;
@@ -276,6 +290,10 @@
MSpan *t;
PageID p;
+ if(s->types.sysalloc)
+ runtime_settype_sysfree(s);
+ s->types.compression = MTypes_Empty;
+
if(s->state != MSpanInUse || s->ref != 0) {
runtime_printf("MHeap_FreeLocked - span %p ptr %p state %d ref %d\n", s, s->start<<PageShift, s->state, s->ref);
runtime_throw("MHeap_FreeLocked - invalid free");
@@ -416,9 +434,11 @@
span->freelist = nil;
span->ref = 0;
span->sizeclass = 0;
+ span->elemsize = 0;
span->state = 0;
span->unusedsince = 0;
span->npreleased = 0;
+ span->types.compression = MTypes_Empty;
}
// Initialize an empty doubly-linked list.
@@ -15,21 +15,35 @@
// NOTE(rsc): Everything here could use cas if contention became an issue.
static Lock proflock;
-// Per-call-stack allocation information.
+enum { MProf, BProf }; // profile types
+
+// Per-call-stack profiling information.
// Lookup by hashing call stack into a linked-list hash table.
typedef struct Bucket Bucket;
struct Bucket
{
Bucket *next; // next in hash list
- Bucket *allnext; // next in list of all buckets
- uintptr allocs;
- uintptr frees;
- uintptr alloc_bytes;
- uintptr free_bytes;
- uintptr recent_allocs; // since last gc
- uintptr recent_frees;
- uintptr recent_alloc_bytes;
- uintptr recent_free_bytes;
+ Bucket *allnext; // next in list of all mbuckets/bbuckets
+ int32 typ;
+ union
+ {
+ struct // typ == MProf
+ {
+ uintptr allocs;
+ uintptr frees;
+ uintptr alloc_bytes;
+ uintptr free_bytes;
+ uintptr recent_allocs; // since last gc
+ uintptr recent_frees;
+ uintptr recent_alloc_bytes;
+ uintptr recent_free_bytes;
+ };
+ struct // typ == BProf
+ {
+ int64 count;
+ int64 cycles;
+ };
+ };
uintptr hash;
uintptr nstk;
uintptr stk[1];
@@ -38,12 +52,13 @@
BuckHashSize = 179999,
};
static Bucket **buckhash;
-static Bucket *buckets;
+static Bucket *mbuckets; // memory profile buckets
+static Bucket *bbuckets; // blocking profile buckets
static uintptr bucketmem;
// Return the bucket for stk[0:nstk], allocating new bucket if needed.
static Bucket*
-stkbucket(uintptr *stk, int32 nstk, bool alloc)
+stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
{
int32 i;
uintptr h;
@@ -66,7 +81,7 @@
i = h%BuckHashSize;
for(b = buckhash[i]; b; b=b->next)
- if(b->hash == h && b->nstk == (uintptr)nstk &&
+ if(b->typ == typ && b->hash == h && b->nstk == (uintptr)nstk &&
runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
return b;
@@ -76,12 +91,18 @@
b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
bucketmem += sizeof *b + nstk*sizeof stk[0];
runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
+ b->typ = typ;
b->hash = h;
b->nstk = nstk;
b->next = buckhash[i];
buckhash[i] = b;
- b->allnext = buckets;
- buckets = b;
+ if(typ == MProf) {
+ b->allnext = mbuckets;
+ mbuckets = b;
+ } else {
+ b->allnext = bbuckets;
+ bbuckets = b;
+ }
return b;
}
@@ -92,7 +113,7 @@
Bucket *b;
runtime_lock(&proflock);
- for(b=buckets; b; b=b->allnext) {
+ for(b=mbuckets; b; b=b->allnext) {
b->allocs += b->recent_allocs;
b->frees += b->recent_frees;
b->alloc_bytes += b->recent_alloc_bytes;
@@ -107,20 +128,26 @@
// Map from pointer to Bucket* that allocated it.
// Three levels:
-// Linked-list hash table for top N-20 bits.
-// Array index for next 13 bits.
-// Linked list for next 7 bits.
+// Linked-list hash table for top N-AddrHashShift bits.
+// Array index for next AddrDenseBits bits.
+// Linked list for next AddrHashShift-AddrDenseBits bits.
// This is more efficient than using a general map,
// because of the typical clustering of the pointer keys.
typedef struct AddrHash AddrHash;
typedef struct AddrEntry AddrEntry;
+enum {
+ AddrHashBits = 12, // good for 4GB of used address space
+ AddrHashShift = 20, // each AddrHash knows about 1MB of address space
+ AddrDenseBits = 8, // good for a profiling rate of 4096 bytes
+};
+
struct AddrHash
{
AddrHash *next; // next in top-level hash table linked list
uintptr addr; // addr>>20
- AddrEntry *dense[1<<13];
+ AddrEntry *dense[1<<AddrDenseBits];
};
struct AddrEntry
@@ -130,9 +157,6 @@
Bucket *b;
};
-enum {
- AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space
-};
static AddrHash *addrhash[1<<AddrHashBits];
static AddrEntry *addrfree;
static uintptr addrmem;
@@ -155,15 +179,15 @@
AddrHash *ah;
AddrEntry *e;
- h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
+ h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
for(ah=addrhash[h]; ah; ah=ah->next)
- if(ah->addr == (addr>>20))
+ if(ah->addr == (addr>>AddrHashShift))
goto found;
ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
addrmem += sizeof *ah;
ah->next = addrhash[h];
- ah->addr = addr>>20;
+ ah->addr = addr>>AddrHashShift;
addrhash[h] = ah;
found:
@@ -175,9 +199,9 @@
e[63].next = nil;
}
addrfree = e->next;
- e->addr = (uint32)~(addr & ((1<<20)-1));
+ e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
e->b = b;
- h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
+ h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
e->next = ah->dense[h];
ah->dense[h] = e;
}
@@ -191,16 +215,16 @@
AddrEntry *e, **l;
Bucket *b;
- h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
+ h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
for(ah=addrhash[h]; ah; ah=ah->next)
- if(ah->addr == (addr>>20))
+ if(ah->addr == (addr>>AddrHashShift))
goto found;
return nil;
found:
- h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
+ h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
- if(e->addr == (uint32)~(addr & ((1<<20)-1))) {
+ if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
*l = e->next;
b = e->b;
e->next = addrfree;
@@ -227,7 +251,7 @@
m->nomemprof++;
nstk = runtime_callers(1, stk, 32);
runtime_lock(&proflock);
- b = stkbucket(stk, nstk, true);
+ b = stkbucket(MProf, stk, nstk, true);
b->recent_allocs++;
b->recent_alloc_bytes += size;
setaddrbucket((uintptr)p, b);
@@ -259,6 +283,37 @@
m->nomemprof--;
}
+int64 runtime_blockprofilerate; // in CPU ticks
+
+void runtime_SetBlockProfileRate(intgo) asm("runtime.SetBlockProfileRate");
+
+void
+runtime_SetBlockProfileRate(intgo rate)
+{
+ runtime_atomicstore64((uint64*)&runtime_blockprofilerate, rate * runtime_tickspersecond() / (1000*1000*1000));
+}
+
+void
+runtime_blockevent(int64 cycles, int32 skip)
+{
+ int32 nstk;
+ int64 rate;
+ uintptr stk[32];
+ Bucket *b;
+
+ if(cycles <= 0)
+ return;
+ rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate);
+ if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
+ return;
+
+ nstk = runtime_callers(skip, stk, 32);
+ runtime_lock(&proflock);
+ b = stkbucket(BProf, stk, nstk, true);
+ b->count++;
+ b->cycles += cycles;
+ runtime_unlock(&proflock);
+}
// Go interface to profile data. (Declared in extern.go)
// Assumes Go sizeof(int) == sizeof(int32)
@@ -287,20 +342,20 @@
r->stk[i] = 0;
}
-func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
+func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
Bucket *b;
Record *r;
runtime_lock(&proflock);
n = 0;
- for(b=buckets; b; b=b->allnext)
+ for(b=mbuckets; b; b=b->allnext)
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
n++;
ok = false;
if(n <= p.__count) {
ok = true;
r = (Record*)p.__values;
- for(b=buckets; b; b=b->allnext)
+ for(b=mbuckets; b; b=b->allnext)
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
record(r++, b);
}
@@ -308,12 +363,46 @@
}
void
-runtime_MProf_Mark(void (*scan)(byte *, int64))
+runtime_MProf_Mark(void (*addroot)(byte *, uintptr))
{
// buckhash is not allocated via mallocgc.
- scan((byte*)&buckets, sizeof buckets);
- scan((byte*)&addrhash, sizeof addrhash);
- scan((byte*)&addrfree, sizeof addrfree);
+ addroot((byte*)&mbuckets, sizeof mbuckets);
+ addroot((byte*)&bbuckets, sizeof bbuckets);
+ addroot((byte*)&addrhash, sizeof addrhash);
+ addroot((byte*)&addrfree, sizeof addrfree);
+}
+
+// Must match BlockProfileRecord in debug.go.
+typedef struct BRecord BRecord;
+struct BRecord {
+ int64 count;
+ int64 cycles;
+ uintptr stk[32];
+};
+
+func BlockProfile(p Slice) (n int, ok bool) {
+ Bucket *b;
+ BRecord *r;
+ int32 i;
+
+ runtime_lock(&proflock);
+ n = 0;
+ for(b=bbuckets; b; b=b->allnext)
+ n++;
+ ok = false;
+ if(n <= p.__count) {
+ ok = true;
+ r = (BRecord*)p.__values;
+ for(b=bbuckets; b; b=b->allnext, r++) {
+ r->count = b->count;
+ r->cycles = b->cycles;
+ for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++)
+ r->stk[i] = b->stk[i];
+ for(; (uintptr)i<nelem(r->stk); i++)
+ r->stk[i] = 0;
+ }
+ }
+ runtime_unlock(&proflock);
}
// Must match StackRecord in debug.go.
@@ -322,7 +411,7 @@
uintptr stk[32];
};
-func ThreadCreateProfile(p Slice) (n int32, ok bool) {
+func ThreadCreateProfile(p Slice) (n int, ok bool) {
TRecord *r;
M *first, *m;
@@ -341,7 +430,7 @@
}
}
-func Stack(b Slice, all bool) (n int32) {
+func Stack(b Slice, all bool) (n int) {
byte *pc, *sp;
bool enablegc;
@@ -378,7 +467,7 @@
runtime_m()->gcing = 0;
mstats.enablegc = enablegc;
runtime_semrelease(&runtime_worldsema);
- runtime_starttheworld(false);
+ runtime_starttheworld();
}
}
@@ -397,7 +486,7 @@
r->stk[n] = 0;
}
-func GoroutineProfile(b Slice) (n int32, ok bool) {
+func GoroutineProfile(b Slice) (n int, ok bool) {
TRecord *r;
G *gp;
@@ -423,7 +512,7 @@
runtime_m()->gcing = 0;
runtime_semrelease(&runtime_worldsema);
- runtime_starttheworld(false);
+ runtime_starttheworld();
}
}
@@ -0,0 +1,115 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "go-defer.h"
+
+// Code related to defer, panic and recover.
+
+uint32 runtime_panicking;
+static Lock paniclk;
+
+// Run all deferred functions for the current goroutine.
+static void
+rundefer(void)
+{
+ G *g;
+ Defer *d;
+
+ g = runtime_g();
+ while((d = g->defer) != nil) {
+ void (*pfn)(void*);
+
+ g->defer = d->__next;
+ pfn = d->__pfn;
+ d->__pfn = nil;
+ if (pfn != nil)
+ (*pfn)(d->__arg);
+ runtime_free(d);
+ }
+}
+
+void
+runtime_startpanic(void)
+{
+ M *m;
+
+ m = runtime_m();
+ if(m->dying) {
+ runtime_printf("panic during panic\n");
+ runtime_exit(3);
+ }
+ m->dying = 1;
+ runtime_xadd(&runtime_panicking, 1);
+ runtime_lock(&paniclk);
+}
+
+void
+runtime_dopanic(int32 unused __attribute__ ((unused)))
+{
+ G *g;
+ static bool didothers;
+
+ g = runtime_g();
+ if(g->sig != 0)
+ runtime_printf("[signal %x code=%p addr=%p]\n",
+ g->sig, (void*)g->sigcode0, (void*)g->sigcode1);
+
+ if(runtime_gotraceback()){
+ if(g != runtime_m()->g0) {
+ runtime_printf("\n");
+ runtime_goroutineheader(g);
+ runtime_traceback();
+ runtime_goroutinetrailer(g);
+ }
+ if(!didothers) {
+ didothers = true;
+ runtime_tracebackothers(g);
+ }
+ }
+ runtime_unlock(&paniclk);
+ if(runtime_xadd(&runtime_panicking, -1) != 0) {
+ // Some other m is panicking too.
+ // Let it print what it needs to print.
+ // Wait forever without chewing up cpu.
+ // It will exit when it's done.
+ static Lock deadlock;
+ runtime_lock(&deadlock);
+ runtime_lock(&deadlock);
+ }
+
+ runtime_exit(2);
+}
+
+void
+runtime_throw(const char *s)
+{
+ runtime_startpanic();
+ runtime_printf("throw: %s\n", s);
+ runtime_dopanic(0);
+ *(int32*)0 = 0; // not reached
+ runtime_exit(1); // even more not reached
+}
+
+void
+runtime_panicstring(const char *s)
+{
+ Eface err;
+
+ if(runtime_m()->gcing) {
+ runtime_printf("panic: %s\n", s);
+ runtime_throw("panic during gc");
+ }
+ runtime_newErrorString(runtime_gostringnocopy((const byte*)s), &err);
+ runtime_panic(err);
+}
+
+void runtime_Goexit (void) asm ("runtime.Goexit");
+
+void
+runtime_Goexit(void)
+{
+ rundefer();
+ runtime_goexit();
+}
@@ -0,0 +1,232 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Parallel for algorithm.
+
+#include "runtime.h"
+#include "arch.h"
+
+struct ParForThread
+{
+ // the thread's iteration space [32lsb, 32msb)
+ uint64 pos;
+ // stats
+ uint64 nsteal;
+ uint64 nstealcnt;
+ uint64 nprocyield;
+ uint64 nosyield;
+ uint64 nsleep;
+ byte pad[CacheLineSize];
+};
+
+ParFor*
+runtime_parforalloc(uint32 nthrmax)
+{
+ ParFor *desc;
+
+ // The ParFor object is followed by CacheLineSize padding
+ // and then nthrmax ParForThread.
+ desc = (ParFor*)runtime_malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
+ desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
+ desc->nthrmax = nthrmax;
+ return desc;
+}
+
+// For testing from Go
+// func parforalloc2(nthrmax uint32) *ParFor
+
+ParFor *runtime_parforalloc2(uint32)
+ asm("runtime.parforalloc2");
+
+ParFor *
+runtime_parforalloc2(uint32 nthrmax)
+{
+ return runtime_parforalloc(nthrmax);
+}
+
+void
+runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
+{
+ uint32 i, begin, end;
+
+ if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
+ runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
+ runtime_throw("parfor: invalid args");
+ }
+
+ desc->body = body;
+ desc->done = 0;
+ desc->nthr = nthr;
+ desc->thrseq = 0;
+ desc->cnt = n;
+ desc->ctx = ctx;
+ desc->wait = wait;
+ desc->nsteal = 0;
+ desc->nstealcnt = 0;
+ desc->nprocyield = 0;
+ desc->nosyield = 0;
+ desc->nsleep = 0;
+ for(i=0; i<nthr; i++) {
+ begin = (uint64)n*i / nthr;
+ end = (uint64)n*(i+1) / nthr;
+ desc->thr[i].pos = (uint64)begin | (((uint64)end)<<32);
+ }
+}
+
+// For testing from Go
+// func parforsetup2(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32))
+
+void runtime_parforsetup2(ParFor *, uint32, uint32, void *, bool, void *)
+ asm("runtime.parforsetup2");
+
+void
+runtime_parforsetup2(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void *body)
+{
+ runtime_parforsetup(desc, nthr, n, ctx, wait, (void(*)(ParFor*, uint32))body);
+}
+
+void
+runtime_parfordo(ParFor *desc)
+{
+ ParForThread *me;
+ uint32 tid, begin, end, begin2, try, victim, i;
+ uint64 *mypos, *victimpos, pos, newpos;
+ void (*body)(ParFor*, uint32);
+ bool idle;
+
+ // Obtain 0-based thread index.
+ tid = runtime_xadd(&desc->thrseq, 1) - 1;
+ if(tid >= desc->nthr) {
+ runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr);
+ runtime_throw("parfor: invalid tid");
+ }
+
+ // If single-threaded, just execute the for serially.
+ if(desc->nthr==1) {
+ for(i=0; i<desc->cnt; i++)
+ desc->body(desc, i);
+ return;
+ }
+
+ body = desc->body;
+ me = &desc->thr[tid];
+ mypos = &me->pos;
+ for(;;) {
+ for(;;) {
+ // While there is local work,
+ // bump low index and execute the iteration.
+ pos = runtime_xadd64(mypos, 1);
+ begin = (uint32)pos-1;
+ end = (uint32)(pos>>32);
+ if(begin < end) {
+ body(desc, begin);
+ continue;
+ }
+ break;
+ }
+
+ // Out of work, need to steal something.
+ idle = false;
+ for(try=0;; try++) {
+ // If we don't see any work for long enough,
+ // increment the done counter...
+ if(try > desc->nthr*4 && !idle) {
+ idle = true;
+ runtime_xadd(&desc->done, 1);
+ }
+ // ...if all threads have incremented the counter,
+ // we are done.
+ if(desc->done + !idle == desc->nthr) {
+ if(!idle)
+ runtime_xadd(&desc->done, 1);
+ goto exit;
+ }
+ // Choose a random victim for stealing.
+ victim = runtime_fastrand1() % (desc->nthr-1);
+ if(victim >= tid)
+ victim++;
+ victimpos = &desc->thr[victim].pos;
+ pos = runtime_atomicload64(victimpos);
+ for(;;) {
+ // See if it has any work.
+ begin = (uint32)pos;
+ end = (uint32)(pos>>32);
+ if(begin >= end-1) {
+ begin = end = 0;
+ break;
+ }
+ if(idle) {
+ runtime_xadd(&desc->done, -1);
+ idle = false;
+ }
+ begin2 = begin + (end-begin)/2;
+ newpos = (uint64)begin | (uint64)begin2<<32;
+ if(runtime_cas64(victimpos, &pos, newpos)) {
+ begin = begin2;
+ break;
+ }
+ }
+ if(begin < end) {
+ // Has successfully stolen some work.
+ if(idle)
+ runtime_throw("parfor: should not be idle");
+ runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
+ me->nsteal++;
+ me->nstealcnt += end-begin;
+ break;
+ }
+ // Backoff.
+ if(try < desc->nthr) {
+ // nothing
+ } else if (try < 4*desc->nthr) {
+ me->nprocyield++;
+ runtime_procyield(20);
+ // If a caller asked not to wait for the others, exit now
+ // (assume that most work is already done at this point).
+ } else if (!desc->wait) {
+ if(!idle)
+ runtime_xadd(&desc->done, 1);
+ goto exit;
+ } else if (try < 6*desc->nthr) {
+ me->nosyield++;
+ runtime_osyield();
+ } else {
+ me->nsleep++;
+ runtime_usleep(1);
+ }
+ }
+ }
+exit:
+ runtime_xadd64(&desc->nsteal, me->nsteal);
+ runtime_xadd64(&desc->nstealcnt, me->nstealcnt);
+ runtime_xadd64(&desc->nprocyield, me->nprocyield);
+ runtime_xadd64(&desc->nosyield, me->nosyield);
+ runtime_xadd64(&desc->nsleep, me->nsleep);
+ me->nsteal = 0;
+ me->nstealcnt = 0;
+ me->nprocyield = 0;
+ me->nosyield = 0;
+ me->nsleep = 0;
+}
+
+// For testing from Go
+// func parforiters(desc *ParFor, tid uintptr) (uintptr, uintptr)
+
+struct parforiters_ret {
+ uintptr start;
+ uintptr end;
+};
+
+struct parforiters_ret runtime_parforiters(ParFor *, uintptr)
+ asm("runtime.parforiters");
+
+struct parforiters_ret
+runtime_parforiters(ParFor *desc, uintptr tid)
+{
+ struct parforiters_ret ret;
+
+ ret.start = (uint32)desc->thr[tid].pos;
+ ret.end = (uint32)(desc->thr[tid].pos>>32);
+ return ret;
+}
@@ -156,15 +156,16 @@
int32 e, s, i, n;
float64 h;
- if(runtime_isNaN(v)) {
+ if(ISNAN(v)) {
gwrite("NaN", 3);
return;
}
- if(runtime_isInf(v, 1)) {
+ i = __builtin_isinf_sign(v);
+ if(i > 0) {
gwrite("+Inf", 4);
return;
}
- if(runtime_isInf(v, -1)) {
+ if(i < 0) {
gwrite("-Inf", 4);
return;
}
@@ -290,8 +291,8 @@
// extern uint32 runtime_maxstring;
// if(v.len > runtime_maxstring) {
- // gwrite("[invalid string]", 16);
- // return;
+ // gwrite("[string too long]", 17);
+ // return;
// }
if(v.__length > 0)
gwrite(v.__data, v.__length);
@@ -17,6 +17,7 @@
#include "arch.h"
#include "defs.h"
#include "malloc.h"
+#include "race.h"
#include "go-defer.h"
#ifdef USING_SPLIT_STACK
@@ -330,6 +331,9 @@
{
void (*fn)(void*);
+ if(g->traceback != nil)
+ gtraceback(g);
+
fn = (void (*)(void*))(g->entry);
fn(g->param);
runtime_goexit();
@@ -471,6 +475,9 @@
// Can not enable GC until all roots are registered.
// mstats.enablegc = 1;
m->nomemprof--;
+
+ if(raceenabled)
+ runtime_raceinit();
}
extern void main_init(void) __asm__ ("__go_init_main");
@@ -507,6 +514,8 @@
runtime_gosched();
main_main();
+ if(raceenabled)
+ runtime_racefini();
runtime_exit(0);
for(;;)
*(int32*)0 = 0;
@@ -540,11 +549,11 @@
}
void
-runtime_goroutineheader(G *g)
+runtime_goroutineheader(G *gp)
{
const char *status;
- switch(g->status) {
+ switch(gp->status) {
case Gidle:
status = "idle";
break;
@@ -558,8 +567,8 @@
status = "syscall";
break;
case Gwaiting:
- if(g->waitreason)
- status = g->waitreason;
+ if(gp->waitreason)
+ status = gp->waitreason;
else
status = "waiting";
break;
@@ -570,7 +579,7 @@
status = "???";
break;
}
- runtime_printf("goroutine %d [%s]:\n", g->goid, status);
+ runtime_printf("goroutine %d [%s]:\n", gp->goid, status);
}
void
@@ -598,15 +607,15 @@
void
runtime_tracebackothers(G * volatile me)
{
- G * volatile g;
+ G * volatile gp;
Traceback traceback;
traceback.gp = me;
- for(g = runtime_allg; g != nil; g = g->alllink) {
- if(g == me || g->status == Gdead)
+ for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
+ if(gp == me || gp->status == Gdead)
continue;
runtime_printf("\n");
- runtime_goroutineheader(g);
+ runtime_goroutineheader(gp);
// Our only mechanism for doing a stack trace is
// _Unwind_Backtrace. And that only works for the
@@ -616,25 +625,25 @@
// This means that if g is running or in a syscall, we
// can't reliably print a stack trace. FIXME.
- if(g->status == Gsyscall || g->status == Grunning) {
+ if(gp->status == Gsyscall || gp->status == Grunning) {
runtime_printf("no stack trace available\n");
- runtime_goroutinetrailer(g);
+ runtime_goroutinetrailer(gp);
continue;
}
- g->traceback = &traceback;
+ gp->traceback = &traceback;
#ifdef USING_SPLIT_STACK
__splitstack_getcontext(&me->stack_context[0]);
#endif
getcontext(&me->context);
- if(g->traceback != nil) {
- runtime_gogo(g);
+ if(gp->traceback != nil) {
+ runtime_gogo(gp);
}
runtime_printtrace(traceback.pcbuf, traceback.c);
- runtime_goroutinetrailer(g);
+ runtime_goroutinetrailer(gp);
}
}
@@ -666,22 +675,22 @@
}
static void
-mcommoninit(M *m)
+mcommoninit(M *mp)
{
- m->id = runtime_sched.mcount++;
- m->fastrand = 0x49f6428aUL + m->id + runtime_cputicks();
+ mp->id = runtime_sched.mcount++;
+ mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
- if(m->mcache == nil)
- m->mcache = runtime_allocmcache();
+ if(mp->mcache == nil)
+ mp->mcache = runtime_allocmcache();
- runtime_callers(1, m->createstack, nelem(m->createstack));
+ runtime_callers(1, mp->createstack, nelem(mp->createstack));
// Add to runtime_allm so garbage collector doesn't free m
// when it is just in a register or thread-local storage.
- m->alllink = runtime_allm;
+ mp->alllink = runtime_allm;
// runtime_NumCgoCall() iterates over allm w/o schedlock,
// so we need to publish it safely.
- runtime_atomicstorep(&runtime_allm, m);
+ runtime_atomicstorep(&runtime_allm, mp);
}
// Try to increment mcpu. Report whether succeeded.
@@ -701,34 +710,34 @@
// Put on `g' queue. Sched must be locked.
static void
-gput(G *g)
+gput(G *gp)
{
- M *m;
+ M *mp;
// If g is wired, hand it off directly.
- if((m = g->lockedm) != nil && canaddmcpu()) {
- mnextg(m, g);
+ if((mp = gp->lockedm) != nil && canaddmcpu()) {
+ mnextg(mp, gp);
return;
}
// If g is the idle goroutine for an m, hand it off.
- if(g->idlem != nil) {
- if(g->idlem->idleg != nil) {
+ if(gp->idlem != nil) {
+ if(gp->idlem->idleg != nil) {
runtime_printf("m%d idle out of sync: g%d g%d\n",
- g->idlem->id,
- g->idlem->idleg->goid, g->goid);
+ gp->idlem->id,
+ gp->idlem->idleg->goid, gp->goid);
runtime_throw("runtime: double idle");
}
- g->idlem->idleg = g;
+ gp->idlem->idleg = gp;
return;
}
- g->schedlink = nil;
+ gp->schedlink = nil;
if(runtime_sched.ghead == nil)
- runtime_sched.ghead = g;
+ runtime_sched.ghead = gp;
else
- runtime_sched.gtail->schedlink = g;
- runtime_sched.gtail = g;
+ runtime_sched.gtail->schedlink = gp;
+ runtime_sched.gtail = gp;
// increment gwait.
// if it transitions to nonzero, set atomic gwaiting bit.
@@ -747,11 +756,11 @@
static G*
gget(void)
{
- G *g;
+ G *gp;
- g = runtime_sched.ghead;
- if(g){
- runtime_sched.ghead = g->schedlink;
+ gp = runtime_sched.ghead;
+ if(gp) {
+ runtime_sched.ghead = gp->schedlink;
if(runtime_sched.ghead == nil)
runtime_sched.gtail = nil;
// decrement gwait.
@@ -759,45 +768,45 @@
if(--runtime_sched.gwait == 0)
runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
} else if(m->idleg != nil) {
- g = m->idleg;
+ gp = m->idleg;
m->idleg = nil;
}
- return g;
+ return gp;
}
// Put on `m' list. Sched must be locked.
static void
-mput(M *m)
+mput(M *mp)
{
- m->schedlink = runtime_sched.mhead;
- runtime_sched.mhead = m;
+ mp->schedlink = runtime_sched.mhead;
+ runtime_sched.mhead = mp;
runtime_sched.mwait++;
}
// Get an `m' to run `g'. Sched must be locked.
static M*
-mget(G *g)
+mget(G *gp)
{
- M *m;
+ M *mp;
// if g has its own m, use it.
- if(g && (m = g->lockedm) != nil)
- return m;
+ if(gp && (mp = gp->lockedm) != nil)
+ return mp;
// otherwise use general m pool.
- if((m = runtime_sched.mhead) != nil){
- runtime_sched.mhead = m->schedlink;
+ if((mp = runtime_sched.mhead) != nil) {
+ runtime_sched.mhead = mp->schedlink;
runtime_sched.mwait--;
}
- return m;
+ return mp;
}
// Mark g ready to run.
void
-runtime_ready(G *g)
+runtime_ready(G *gp)
{
schedlock();
- readylocked(g);
+ readylocked(gp);
schedunlock();
}
@@ -805,23 +814,23 @@
// G might be running already and about to stop.
// The sched lock protects g->status from changing underfoot.
static void
-readylocked(G *g)
+readylocked(G *gp)
{
- if(g->m){
+ if(gp->m) {
// Running on another machine.
// Ready it when it stops.
- g->readyonstop = 1;
+ gp->readyonstop = 1;
return;
}
// Mark runnable.
- if(g->status == Grunnable || g->status == Grunning) {
- runtime_printf("goroutine %d has status %d\n", g->goid, g->status);
+ if(gp->status == Grunnable || gp->status == Grunning) {
+ runtime_printf("goroutine %d has status %d\n", gp->goid, gp->status);
runtime_throw("bad g->status in ready");
}
- g->status = Grunnable;
+ gp->status = Grunnable;
- gput(g);
+ gput(gp);
matchmg();
}
@@ -829,23 +838,23 @@
// debuggers can set a breakpoint here and catch all
// new goroutines.
static void
-newprocreadylocked(G *g)
+newprocreadylocked(G *gp)
{
- readylocked(g);
+ readylocked(gp);
}
// Pass g to m for running.
// Caller has already incremented mcpu.
static void
-mnextg(M *m, G *g)
+mnextg(M *mp, G *gp)
{
runtime_sched.grunning++;
- m->nextg = g;
- if(m->waitnextg) {
- m->waitnextg = 0;
+ mp->nextg = gp;
+ if(mp->waitnextg) {
+ mp->waitnextg = 0;
if(mwakeup != nil)
runtime_notewakeup(&mwakeup->havenextg);
- mwakeup = m;
+ mwakeup = mp;
}
}
@@ -969,35 +978,38 @@
}
int32
-runtime_helpgc(bool *extra)
+runtime_gcprocs(void)
+{
+ int32 n;
+
+ // Figure out how many CPUs to use during GC.
+ // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
+ n = runtime_gomaxprocs;
+ if(n > runtime_ncpu)
+ n = runtime_ncpu > 0 ? runtime_ncpu : 1;
+ if(n > MaxGcproc)
+ n = MaxGcproc;
+ if(n > runtime_sched.mwait+1) // one M is currently running
+ n = runtime_sched.mwait+1;
+ return n;
+}
+
+void
+runtime_helpgc(int32 nproc)
{
M *mp;
- int32 n, max;
-
- // Figure out how many CPUs to use.
- // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
- max = runtime_gomaxprocs;
- if(max > runtime_ncpu)
- max = runtime_ncpu > 0 ? runtime_ncpu : 1;
- if(max > MaxGcproc)
- max = MaxGcproc;
-
- // We're going to use one CPU no matter what.
- // Figure out the max number of additional CPUs.
- max--;
+ int32 n;
runtime_lock(&runtime_sched);
- n = 0;
- while(n < max && (mp = mget(nil)) != nil) {
- n++;
+ for(n = 1; n < nproc; n++) { // one M is currently running
+ mp = mget(nil);
+ if(mp == nil)
+ runtime_throw("runtime_gcprocs inconsistency");
mp->helpgc = 1;
mp->waitnextg = 0;
runtime_notewakeup(&mp->havenextg);
}
runtime_unlock(&runtime_sched);
- if(extra)
- *extra = n != max;
- return n;
}
void
@@ -1037,26 +1049,38 @@
}
void
-runtime_starttheworld(bool extra)
+runtime_starttheworld(void)
{
- M *m;
+ M *mp;
+ int32 max;
+
+ // Figure out how many CPUs GC could possibly use.
+ max = runtime_gomaxprocs;
+ if(max > runtime_ncpu)
+ max = runtime_ncpu > 0 ? runtime_ncpu : 1;
+ if(max > MaxGcproc)
+ max = MaxGcproc;
schedlock();
runtime_gcwaiting = 0;
setmcpumax(runtime_gomaxprocs);
matchmg();
- if(extra && canaddmcpu()) {
- // Start a new m that will (we hope) be idle
- // and so available to help when the next
- // garbage collection happens.
+ if(runtime_gcprocs() < max && canaddmcpu()) {
+ // If GC could have used another helper proc, start one now,
+ // in the hope that it will be available next time.
+ // It would have been even better to start it before the collection,
+ // but doing so requires allocating memory, so it's tricky to
+ // coordinate. This lazy approach works out in practice:
+ // we don't mind if the first couple gc rounds don't have quite
+ // the maximum number of procs.
// canaddmcpu above did mcpu++
// (necessary, because m will be doing various
// initialization work so is definitely running),
// but m is not running a specific goroutine,
// so set the helpgc flag as a signal to m's
// first schedule(nil) to mcpu-- and grunning--.
- m = runtime_newm();
- m->helpgc = 1;
+ mp = runtime_newm();
+ mp->helpgc = 1;
runtime_sched.grunning++;
}
schedunlock();
@@ -1110,6 +1134,11 @@
runtime_initsig();
schedule(nil);
+
+ // TODO(brainman): This point is never reached, because scheduler
+ // does not release os threads at the moment. But once this path
+ // is enabled, we must remove our seh here.
+
return nil;
}
@@ -1148,14 +1177,14 @@
M*
runtime_newm(void)
{
- M *m;
+ M *mp;
pthread_attr_t attr;
pthread_t tid;
size_t stacksize;
- m = runtime_malloc(sizeof(M));
- mcommoninit(m);
- m->g0 = runtime_malg(-1, nil, nil);
+ mp = runtime_malloc(sizeof(M));
+ mcommoninit(mp);
+ mp->g0 = runtime_malg(-1, nil, nil);
if(pthread_attr_init(&attr) != 0)
runtime_throw("pthread_attr_init");
@@ -1175,10 +1204,10 @@
if(pthread_attr_setstacksize(&attr, stacksize) != 0)
runtime_throw("pthread_attr_setstacksize");
- if(pthread_create(&tid, &attr, runtime_mstart, m) != 0)
+ if(pthread_create(&tid, &attr, runtime_mstart, mp) != 0)
runtime_throw("pthread_create");
- return m;
+ return mp;
}
// One round of scheduler: find a goroutine and run it.
@@ -1202,7 +1231,7 @@
if(atomic_mcpu(v) > maxgomaxprocs)
runtime_throw("negative mcpu in scheduler");
- switch(gp->status){
+ switch(gp->status) {
case Grunnable:
case Gdead:
// Shouldn't have been running!
@@ -1212,6 +1241,8 @@
gput(gp);
break;
case Gmoribund:
+ if(raceenabled)
+ runtime_racegoend(gp->goid);
gp->status = Gdead;
if(gp->lockedm) {
gp->lockedm = nil;
@@ -1224,7 +1255,7 @@
runtime_exit(0);
break;
}
- if(gp->readyonstop){
+ if(gp->readyonstop) {
gp->readyonstop = 0;
readylocked(gp);
}
@@ -1272,6 +1303,18 @@
runtime_mcall(schedule);
}
+// Puts the current goroutine into a waiting state and unlocks the lock.
+// The goroutine can be made runnable again by calling runtime_ready(gp).
+void
+runtime_park(void (*unlockf)(Lock*), Lock *lock, const char *reason)
+{
+ g->status = Gwaiting;
+ g->waitreason = reason;
+ if(unlockf)
+ unlockf(lock);
+ runtime_gosched();
+}
+
// The goroutine g is about to enter a system call.
// Record that it's not using the cpu anymore.
// This is called only from the go syscall library and cgocall,
@@ -1448,10 +1491,15 @@
byte *sp;
size_t spsize;
G *newg;
+ int32 goid;
+
+ goid = runtime_xadd((uint32*)&runtime_sched.goidgen, 1);
+ if(raceenabled)
+ runtime_racegostart(goid, runtime_getcallerpc(&fn));
schedlock();
- if((newg = gfget()) != nil){
+ if((newg = gfget()) != nil) {
#ifdef USING_SPLIT_STACK
int dont_block_signals = 0;
@@ -1482,8 +1530,7 @@
newg->gopc = (uintptr)__builtin_return_address(0);
runtime_sched.gcount++;
- runtime_sched.goidgen++;
- newg->goid = runtime_sched.goidgen;
+ newg->goid = goid;
if(sp == nil)
runtime_throw("nil g->stack0");
@@ -1512,49 +1559,22 @@
// Put on gfree list. Sched must be locked.
static void
-gfput(G *g)
+gfput(G *gp)
{
- g->schedlink = runtime_sched.gfree;
- runtime_sched.gfree = g;
+ gp->schedlink = runtime_sched.gfree;
+ runtime_sched.gfree = gp;
}
// Get from gfree list. Sched must be locked.
static G*
gfget(void)
{
- G *g;
+ G *gp;
- g = runtime_sched.gfree;
- if(g)
- runtime_sched.gfree = g->schedlink;
- return g;
-}
-
-// Run all deferred functions for the current goroutine.
-static void
-rundefer(void)
-{
- Defer *d;
-
- while((d = g->defer) != nil) {
- void (*pfn)(void*);
-
- pfn = d->__pfn;
- d->__pfn = nil;
- if (pfn != nil)
- (*pfn)(d->__arg);
- g->defer = d->__next;
- runtime_free(d);
- }
-}
-
-void runtime_Goexit (void) asm ("runtime.Goexit");
-
-void
-runtime_Goexit(void)
-{
- rundefer();
- runtime_goexit();
+ gp = runtime_sched.gfree;
+ if(gp)
+ runtime_sched.gfree = gp->schedlink;
+ return gp;
}
void runtime_Gosched (void) asm ("runtime.Gosched");
@@ -1651,10 +1671,10 @@
return m->id;
}
-int32 runtime_NumGoroutine (void)
+intgo runtime_NumGoroutine (void)
__asm__ ("runtime.NumGoroutine");
-int32
+intgo
runtime_NumGoroutine()
{
return runtime_sched.gcount;
@@ -0,0 +1,30 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Definitions related to data race detection.
+
+#ifdef RACE
+enum { raceenabled = 1 };
+#else
+enum { raceenabled = 0 };
+#endif
+
+// Initialize race detection subsystem.
+void runtime_raceinit(void);
+// Finalize race detection subsystem, does not return.
+void runtime_racefini(void);
+
+void runtime_racemalloc(void *p, uintptr sz, void *pc);
+void runtime_racefree(void *p);
+void runtime_racegostart(int32 goid, void *pc);
+void runtime_racegoend(int32 goid);
+void runtime_racewritepc(void *addr, void *pc);
+void runtime_racereadpc(void *addr, void *pc);
+void runtime_racefingo(void);
+void runtime_raceacquire(void *addr);
+void runtime_raceacquireg(G *gp, void *addr);
+void runtime_racerelease(void *addr);
+void runtime_racereleaseg(G *gp, void *addr);
+void runtime_racereleasemerge(void *addr);
+void runtime_racereleasemergeg(G *gp, void *addr);
@@ -4,13 +4,13 @@
#include <unistd.h>
+#include "config.h"
+
#include "runtime.h"
#include "array.h"
#include "go-panic.h"
#include "go-string.h"
-uint32 runtime_panicking;
-
int32
runtime_gotraceback(void)
{
@@ -22,84 +22,6 @@
return runtime_atoi(p);
}
-static Lock paniclk;
-
-void
-runtime_startpanic(void)
-{
- M *m;
-
- m = runtime_m();
- if(m->dying) {
- runtime_printf("panic during panic\n");
- runtime_exit(3);
- }
- m->dying = 1;
- runtime_xadd(&runtime_panicking, 1);
- runtime_lock(&paniclk);
-}
-
-void
-runtime_dopanic(int32 unused __attribute__ ((unused)))
-{
- G* g;
- static bool didothers;
-
- g = runtime_g();
- if(g->sig != 0)
- runtime_printf("[signal %x code=%p addr=%p]\n",
- g->sig, (void*)(g->sigcode0), (void*)(g->sigcode1));
-
- if(runtime_gotraceback()){
- if(g != runtime_m()->g0) {
- runtime_printf("\n");
- runtime_goroutineheader(g);
- runtime_traceback();
- runtime_goroutinetrailer(g);
- }
- if(!didothers) {
- didothers = true;
- runtime_tracebackothers(g);
- }
- }
-
- runtime_unlock(&paniclk);
- if(runtime_xadd(&runtime_panicking, -1) != 0) {
- // Some other m is panicking too.
- // Let it print what it needs to print.
- // Wait forever without chewing up cpu.
- // It will exit when it's done.
- static Lock deadlock;
- runtime_lock(&deadlock);
- runtime_lock(&deadlock);
- }
-
- runtime_exit(2);
-}
-
-void
-runtime_throw(const char *s)
-{
- runtime_startpanic();
- runtime_printf("throw: %s\n", s);
- runtime_dopanic(0);
- *(int32*)0 = 0; // not reached
- runtime_exit(1); // even more not reached
-}
-
-void
-runtime_panicstring(const char *s)
-{
- Eface err;
-
- if(runtime_m()->gcing) {
- runtime_printf("panic: %s\n", s);
- runtime_throw("panic during gc");
- }
- runtime_newErrorString(runtime_gostringnocopy((const byte*)s), &err);
- runtime_panic(err);
-}
-
static int32 argc;
static byte** argv;
@@ -247,14 +169,41 @@
return traceback > 1 || (s != nil && __builtin_strchr((const char*)s, '.') != nil && __builtin_memcmp(s, "runtime.", 7) != 0);
}
-bool
-runtime_isInf(float64 f, int32 sign)
+static Lock ticksLock;
+static int64 ticks;
+
+int64
+runtime_tickspersecond(void)
{
- if(!__builtin_isinf(f))
- return false;
- if(sign == 0)
- return true;
- if(sign > 0)
- return f > 0;
- return f < 0;
+ int64 res, t0, t1, c0, c1;
+
+ res = (int64)runtime_atomicload64((uint64*)&ticks);
+ if(res != 0)
+ return ticks;
+ runtime_lock(&ticksLock);
+ res = ticks;
+ if(res == 0) {
+ t0 = runtime_nanotime();
+ c0 = runtime_cputicks();
+ runtime_usleep(100*1000);
+ t1 = runtime_nanotime();
+ c1 = runtime_cputicks();
+ if(t1 == t0)
+ t1++;
+ res = (c1-c0)*1000*1000*1000/(t1-t0);
+ if(res == 0)
+ res++;
+ runtime_atomicstore64((uint64*)&ticks, res);
+ }
+ runtime_unlock(&ticksLock);
+ return res;
}
+
+int64 runtime_pprof_runtime_cyclesPerSecond(void)
+ asm("runtime_pprof.runtime_cyclesPerSecond");
+
+int64
+runtime_pprof_runtime_cyclesPerSecond(void)
+{
+ return runtime_tickspersecond();
+}
@@ -1,8 +1,6 @@
-/* runtime.h -- runtime support for Go.
-
- Copyright 2009 The Go Authors. All rights reserved.
- Use of this source code is governed by a BSD-style
- license that can be found in the LICENSE file. */
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
#include "config.h"
@@ -42,8 +40,12 @@
typedef unsigned int uint64 __attribute__ ((mode (DI)));
typedef float float32 __attribute__ ((mode (SF)));
typedef double float64 __attribute__ ((mode (DF)));
+typedef signed int intptr __attribute__ ((mode (pointer)));
typedef unsigned int uintptr __attribute__ ((mode (pointer)));
+typedef int intgo; // Go's int
+typedef unsigned int uintgo; // Go's uint
+
/* Defined types. */
typedef uint8 bool;
@@ -59,6 +61,10 @@
typedef struct Hchan Hchan;
typedef struct Timers Timers;
typedef struct Timer Timer;
+typedef struct GCStats GCStats;
+typedef struct LFNode LFNode;
+typedef struct ParFor ParFor;
+typedef struct ParForThread ParForThread;
typedef struct __go_open_array Slice;
typedef struct __go_string String;
@@ -105,6 +111,10 @@
true = 1,
false = 0,
};
+enum
+{
+ PtrSize = sizeof(void*),
+};
/*
* structures
@@ -119,6 +129,16 @@
uint32 key; // futex-based impl
M* waitm; // waiting M (sema-based impl)
};
+struct GCStats
+{
+ // the struct must consist of only uint64's,
+ // because it is casted to uint64[].
+ uint64 nhandoff;
+ uint64 nhandoffcnt;
+ uint64 nprocyield;
+ uint64 nosyield;
+ uint64 nsleep;
+};
struct G
{
Defer* defer;
@@ -142,6 +162,7 @@
G* schedlink;
bool readyonstop;
bool ispanic;
+ int8 raceignore; // ignore race detection events
M* m; // for debuggers, but offset not hard-coded
M* lockedm;
M* idlem;
@@ -190,6 +211,14 @@
uintptr waitsema; // semaphore for parking on locks
uint32 waitsemacount;
uint32 waitsemalock;
+ GCStats gcstats;
+ bool racecall;
+ void* racepc;
+
+ uintptr settype_buf[1024];
+ uintptr settype_bufsize;
+
+ uintptr end[];
};
struct SigTab
@@ -218,7 +247,6 @@
uintptr entry; // entry pc
};
-/* Macros. */
#ifdef GOOS_windows
enum {
@@ -257,6 +285,34 @@
Eface arg;
};
+// Lock-free stack node.
+struct LFNode
+{
+ LFNode *next;
+ uintptr pushcnt;
+};
+
+// Parallel for descriptor.
+struct ParFor
+{
+ void (*body)(ParFor*, uint32); // executed for each element
+ uint32 done; // number of idle threads
+ uint32 nthr; // total number of threads
+ uint32 nthrmax; // maximum number of threads
+ uint32 thrseq; // thread id sequencer
+ uint32 cnt; // iteration space [0, cnt)
+ void *ctx; // arbitrary user context
+ bool wait; // if true, wait while all threads finish processing,
+ // otherwise parfor may return while other threads are still working
+ ParForThread *thr; // array of thread descriptors
+ // stats
+ uint64 nsteal;
+ uint64 nstealcnt;
+ uint64 nprocyield;
+ uint64 nosyield;
+ uint64 nsleep;
+};
+
/*
* defined macros
* you need super-gopher-guru privilege
@@ -265,6 +321,7 @@
#define nelem(x) (sizeof(x)/sizeof((x)[0]))
#define nil ((void*)0)
#define USED(v) ((void) v)
+#define ROUND(x, n) (((x)+(n)-1)&~((n)-1)) /* all-caps to mark as macro: it evaluates n twice */
/*
* external data
@@ -312,7 +369,8 @@
void runtime_minit(void);
void runtime_mallocinit(void);
void runtime_gosched(void);
-void runtime_tsleep(int64);
+void runtime_park(void(*)(Lock*), Lock*, const char*);
+void runtime_tsleep(int64, const char*);
M* runtime_newm(void);
void runtime_goexit(void);
void runtime_entersyscall(void) __asm__("syscall.Entersyscall");
@@ -322,9 +380,12 @@
int32 runtime_callers(int32, uintptr*, int32);
int64 runtime_nanotime(void);
int64 runtime_cputicks(void);
+int64 runtime_tickspersecond(void);
+void runtime_blockevent(int64, int32);
+extern int64 runtime_blockprofilerate;
void runtime_stoptheworld(void);
-void runtime_starttheworld(bool);
+void runtime_starttheworld(void);
extern uint32 runtime_worldsema;
G* __go_go(void (*pfn)(void*), void*);
@@ -372,6 +433,28 @@
void runtime_futexwakeup(uint32*, uint32);
/*
+ * Lock-free stack.
+ * Initialize uint64 head to 0, compare with 0 to test for emptiness.
+ * The stack does not keep pointers to nodes,
+ * so they can be garbage collected if there are no other pointers to nodes.
+ */
+void runtime_lfstackpush(uint64 *head, LFNode *node)
+ asm("runtime.lfstackpush");
+LFNode* runtime_lfstackpop(uint64 *head);
+
+/*
+ * Parallel for over [0, n).
+ * body() is executed for each iteration.
+ * nthr - total number of worker threads.
+ * ctx - arbitrary user context.
+ * if wait=true, threads return from parfor() when all work is done;
+ * otherwise, threads can return while other threads are still finishing processing.
+ */
+ParFor* runtime_parforalloc(uint32 nthrmax);
+void runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32));
+void runtime_parfordo(ParFor *desc) asm("runtime.parfordo");
+
+/*
* low level C-called
*/
#define runtime_mmap mmap
@@ -432,12 +515,17 @@
void free(void *v);
#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
+#define runtime_cas64(pval, pold, new) __atomic_compare_exchange_n (pval, pold, new, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
#define runtime_xadd(p, v) __sync_add_and_fetch (p, v)
+#define runtime_xadd64(p, v) __sync_add_and_fetch (p, v)
#define runtime_xchg(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
#define runtime_atomicload(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
#define runtime_atomicstore(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
#define runtime_atomicloadp(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
#define runtime_atomicstorep(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicload64(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicstore64(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
+#define PREFETCH(p) __builtin_prefetch(p)
struct __go_func_type;
bool runtime_addfinalizer(void*, void(*fn)(void*), const struct __go_func_type *);
@@ -469,8 +557,7 @@
/*
* wrapped for go users
*/
-bool runtime_isInf(float64 f, int32 sign);
-#define runtime_isNaN(f) __builtin_isnan(f)
+#define ISNAN(f) __builtin_isnan(f)
void runtime_semacquire(uint32 volatile *);
void runtime_semrelease(uint32 volatile *);
int32 runtime_gomaxprocsfunc(int32 n);
@@ -493,8 +580,13 @@
// This is a no-op on other systems.
void runtime_setprof(bool);
-void runtime_time_scan(void (*)(byte*, int64));
-void runtime_trampoline_scan(void (*)(byte *, int64));
+enum
+{
+ UseSpanType = 1,
+};
+
+void runtime_time_scan(void (*)(byte*, uintptr));
+void runtime_trampoline_scan(void (*)(byte *, uintptr));
void runtime_setsig(int32, bool, bool);
#define runtime_setitimer setitimer
@@ -5,10 +5,10 @@
package runtime
#include "runtime.h"
-func GOMAXPROCS(n int32) (ret int32) {
+func GOMAXPROCS(n int) (ret int) {
ret = runtime_gomaxprocsfunc(n);
}
-func NumCPU() (ret int32) {
+func NumCPU() (ret int) {
ret = runtime_ncpu;
}
@@ -24,30 +24,32 @@
typedef struct Sema Sema;
struct Sema
{
- uint32 volatile *addr;
- G *g;
- Sema *prev;
- Sema *next;
+ uint32 volatile* addr;
+ G* g;
+ int64 releasetime;
+ Sema* prev;
+ Sema* next;
};
typedef struct SemaRoot SemaRoot;
struct SemaRoot
{
- Lock;
- Sema *head;
- Sema *tail;
+ Lock;
+ Sema* head;
+ Sema* tail;
// Number of waiters. Read w/o the lock.
- uint32 volatile nwait;
+ uint32 volatile nwait;
};
// Prime to not correlate with any user patterns.
#define SEMTABLESZ 251
-static union
+union semtable
{
SemaRoot;
uint8 pad[CacheLineSize];
-} semtable[SEMTABLESZ];
+};
+static union semtable semtable[SEMTABLESZ];
static SemaRoot*
semroot(uint32 volatile *addr)
@@ -95,13 +97,13 @@
return 0;
}
-void
-runtime_semacquire(uint32 volatile *addr)
+static void
+semacquireimpl(uint32 volatile *addr, int32 profile)
{
- G *g;
- Sema s;
+ Sema s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it
SemaRoot *root;
-
+ int64 t0;
+
// Easy case.
if(cansemacquire(addr))
return;
@@ -112,8 +114,13 @@
// enqueue itself as a waiter
// sleep
// (waiter descriptor is dequeued by signaler)
- g = runtime_g();
root = semroot(addr);
+ t0 = 0;
+ s.releasetime = 0;
+ if(profile && runtime_blockprofilerate > 0) {
+ t0 = runtime_cputicks();
+ s.releasetime = -1;
+ }
for(;;) {
runtime_lock(root);
@@ -128,16 +135,22 @@
// Any semrelease after the cansemacquire knows we're waiting
// (we set nwait above), so go to sleep.
semqueue(root, addr, &s);
- g->status = Gwaiting;
- g->waitreason = "semacquire";
- runtime_unlock(root);
- runtime_gosched();
- if(cansemacquire(addr))
+ runtime_park(runtime_unlock, root, "semacquire");
+ if(cansemacquire(addr)) {
+ if(t0)
+ runtime_blockevent(s.releasetime - t0, 3);
return;
+ }
}
}
void
+runtime_semacquire(uint32 volatile *addr)
+{
+ semacquireimpl(addr, 0);
+}
+
+void
runtime_semrelease(uint32 volatile *addr)
{
Sema *s;
@@ -168,12 +181,15 @@
}
}
runtime_unlock(root);
- if(s)
+ if(s) {
+ if(s->releasetime)
+ s->releasetime = runtime_cputicks();
runtime_ready(s->g);
+ }
}
func runtime_Semacquire(addr *uint32) {
- runtime_semacquire(addr);
+ semacquireimpl(addr, 1);
}
func runtime_Semrelease(addr *uint32) {
@@ -32,8 +32,8 @@
Runeself = 0x80,
};
-func stringiter(s String, k int32) (retk int32) {
- int32 l, n;
+func stringiter(s String, k int) (retk int) {
+ int32 l;
if(k >= s.__length) {
// retk=0 is end of iteration
@@ -48,15 +48,12 @@
}
// multi-char rune
- n = charntorune(&l, s.__data+k, s.__length-k);
- retk = k + (n ? n : 1);
+ retk = k + charntorune(&l, s.__data+k, s.__length-k);
out:
}
-func stringiter2(s String, k int32) (retk int32, retv int32) {
- int32 n;
-
+func stringiter2(s String, k int) (retk int, retv int) {
if(k >= s.__length) {
// retk=0 is end of iteration
retk = 0;
@@ -71,8 +68,7 @@
}
// multi-char rune
- n = charntorune(&retv, s.__data+k, s.__length-k);
- retk = k + (n ? n : 1);
+ retk = k + charntorune(&retv, s.__data+k, s.__length-k);
out:
}
@@ -10,6 +10,7 @@
#include "defs.h"
#include "arch.h"
#include "malloc.h"
+#include "race.h"
static Timers timers;
static void addtimer(Timer*);
@@ -22,17 +23,16 @@
// Sleep puts the current goroutine to sleep for at least ns nanoseconds.
func Sleep(ns int64) {
- G *g;
-
- g = runtime_g();
- g->status = Gwaiting;
- g->waitreason = "sleep";
- runtime_tsleep(ns);
+ runtime_tsleep(ns, "sleep");
}
// startTimer adds t to the timer heap.
func startTimer(t *Timer) {
+ if(raceenabled)
+ runtime_racerelease(t);
+ runtime_lock(&timers);
addtimer(t);
+ runtime_unlock(&timers);
}
// stopTimer removes t from the timer heap if it is there.
@@ -57,27 +57,24 @@
}
// Put the current goroutine to sleep for ns nanoseconds.
-// The caller must have set g->status and g->waitreason.
void
-runtime_tsleep(int64 ns)
+runtime_tsleep(int64 ns, const char *reason)
{
G* g;
Timer t;
g = runtime_g();
- if(ns <= 0) {
- g->status = Grunning;
- g->waitreason = nil;
+ if(ns <= 0)
return;
- }
t.when = runtime_nanotime() + ns;
t.period = 0;
t.f = ready;
t.arg.__object = g;
+ runtime_lock(&timers);
addtimer(&t);
- runtime_gosched();
+ runtime_park(runtime_unlock, &timers, reason);
}
// Add a timer to the heap and start or kick the timer proc
@@ -88,7 +85,6 @@
int32 n;
Timer **nt;
- runtime_lock(&timers);
if(timers.len >= timers.cap) {
// Grow slice.
n = 16;
@@ -116,7 +112,6 @@
}
if(timers.timerproc == nil)
timers.timerproc = __go_go(timerproc, nil);
- runtime_unlock(&timers);
}
// Delete timer t from the heap.
@@ -159,13 +154,11 @@
static void
timerproc(void* dummy __attribute__ ((unused)))
{
- G *g;
int64 delta, now;
Timer *t;
void (*f)(int64, Eface);
Eface arg;
- g = runtime_g();
for(;;) {
runtime_lock(&timers);
now = runtime_nanotime();
@@ -192,16 +185,15 @@
f = t->f;
arg = t->arg;
runtime_unlock(&timers);
+ if(raceenabled)
+ runtime_raceacquire(t);
f(now, arg);
runtime_lock(&timers);
}
if(delta < 0) {
// No timers left - put goroutine to sleep.
timers.rescheduling = true;
- g->status = Gwaiting;
- g->waitreason = "timer goroutine (idle)";
- runtime_unlock(&timers);
- runtime_gosched();
+ runtime_park(runtime_unlock, &timers, "timer goroutine (idle)");
continue;
}
// At least one timer pending. Sleep until then.
@@ -263,7 +255,7 @@
}
void
-runtime_time_scan(void (*scan)(byte*, int64))
+runtime_time_scan(void (*addroot)(byte*, uintptr))
{
- scan((byte*)&timers, sizeof timers);
+ addroot((byte*)&timers, sizeof timers);
}
@@ -346,6 +346,11 @@
# They all compile; now generate the code to call them.
+testname() {
+ # Remove the package from the name used with the -test option.
+ echo $1 | sed 's/^.*\.//'
+}
+
localname() {
# The package main has been renamed to __main__ when imported.
# Adjust its uses.
@@ -373,7 +378,7 @@
fi
# benchmarks are named BenchmarkFoo.
pattern='Benchmark([^a-z].*)?'
- benchmarks=$($NM -p -v _gotest_.o $xofile | egrep " $test .*\."$pattern'$' | grep -v '\..*\..*\.' | fgrep -v '$' | fgrep -v ' __go_' | sed 's/.* //' | $symtogo)
+ benchmarks=$($NM -p -v _gotest_.o $xofile | egrep " $text .*\."$pattern'$' | grep -v '\..*\..*\.' | fgrep -v '$' | fgrep -v ' __go_' | sed 's/.* //' | $symtogo)
# examples are named ExampleFoo
pattern='Example([^a-z].*)?'
@@ -396,8 +401,9 @@
echo 'var tests = []testing.InternalTest {'
for i in $tests
do
+ n=$(testname $i)
j=$(localname $i)
- echo ' {"'$i'", '$j'},'
+ echo ' {"'$n'", '$j'},'
done
echo '}'
@@ -407,8 +413,9 @@
echo 'var benchmarks = []testing.InternalBenchmark{ //'
for i in $benchmarks
do
+ n=$(testname $i)
j=$(localname $i)
- echo ' {"'$i'", '$j'},'
+ echo ' {"'$n'", '$j'},'
done
echo '}'
@@ -417,8 +424,9 @@
# This doesn't work because we don't pick up the output.
#for i in $examples
#do
+ # n=$(testname $i)
# j=$(localname $i)
- # echo ' {"'$i'", '$j', ""},'
+ # echo ' {"'$n'", '$j', ""},'
#done
echo '}'
===================================================================
@@ -12,7 +12,7 @@ package main
import (
"io/ioutil" // GCCGO_ERROR "imported and not used"
"net/http"
- "os"
+ "os" // GCCGO_ERROR "imported and not used"
)
func makeHandler(fn func(http.ResponseWriter, *http.Request, string)) http.HandlerFunc {
===================================================================
@@ -38,9 +38,9 @@ func BenchmarkSlowNonASCII(b *testing.B)
}
func main() {
- os.Args = []string{os.Args[0], "-test.benchtime=0.1"}
+ os.Args = []string{os.Args[0], "-test.benchtime=100ms"}
flag.Parse()
-
+
rslow := testing.Benchmark(BenchmarkSlowNonASCII)
rfast := testing.Benchmark(BenchmarkFastNonASCII)
tslow := rslow.NsPerOp()