From 27507d611cd81d3f5a1213f126e6b371245dc6a2 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Mon, 15 Sep 2025 20:11:33 -0400 Subject: [PATCH 01/10] Make version of opaque closure constructor in world (#56823) Successor to https://github.com/JuliaLang/julia/pull/56808 which enables an opaque closure to be constructed with a specified world age (if, for example, the current world age isn't valid due to being in a generated function). Required for https://github.com/EnzymeAD/Reactant.jl/pull/365 (cherry picked from commit e46cc1ac7db4cdf5c7eb0cd98be1e4732137ddda) --- src/jl_exported_funcs.inc | 2 ++ src/opaque_closure.c | 18 +++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index dcbe0295c40ae..61420c7306de9 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -334,6 +334,8 @@ XX(jl_new_method_table) \ XX(jl_new_method_uninit) \ XX(jl_new_module) \ + XX(jl_new_opaque_closure_from_code_info) \ + XX(jl_new_opaque_closure_from_code_info_in_world) \ XX(jl_new_primitivetype) \ XX(jl_new_struct) \ XX(jl_new_structt) \ diff --git a/src/opaque_closure.c b/src/opaque_closure.c index 2e39d5965b45a..8561449216d00 100644 --- a/src/opaque_closure.c +++ b/src/opaque_closure.c @@ -28,7 +28,7 @@ JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *sourc } static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub, - jl_value_t *source_, jl_value_t *captures, int do_compile) + jl_value_t *source_, jl_value_t *captures, int do_compile, size_t world) { if (!jl_is_tuple_type((jl_value_t*)argt)) { jl_error("OpaqueClosure argument tuple must be a tuple type"); @@ -61,7 +61,6 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t } } jl_task_t *ct = jl_current_task; - size_t world = ct->world_age; jl_code_instance_t *ci = NULL; if (do_compile) { ci = jl_compile_method_internal(mi, world); @@ -140,13 +139,13 @@ jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_ { jl_value_t *captures = jl_f_tuple(NULL, env, nenv); JL_GC_PUSH1(&captures); - jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, source_, captures, do_compile); + jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, source_, captures, do_compile, jl_current_task->world_age); JL_GC_POP(); return oc; } -JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub, - jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile, int isinferred) +JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info_in_world(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub, + jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile, int isinferred, size_t world) { jl_value_t *root = NULL, *sigtype = NULL; jl_code_instance_t *inst = NULL; @@ -156,7 +155,6 @@ JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tuplet root = jl_new_struct(jl_linenumbernode_type, root, file); jl_method_t *meth = jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva, isinferred); root = (jl_value_t*)meth; - size_t world = jl_current_task->world_age; // these are only legal in the current world since they are not in any tables jl_atomic_store_release(&meth->primary_world, world); @@ -172,11 +170,17 @@ JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tuplet jl_mi_cache_insert(mi, inst); } - jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env, do_compile); + jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env, do_compile, world); JL_GC_POP(); return oc; } +JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub, + jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile, int isinferred) +{ + return jl_new_opaque_closure_from_code_info_in_world(argt, rt_lb, rt_ub, mod, ci, lineno, file, nargs, isva, env, do_compile, isinferred, jl_current_task->world_age); +} + JL_CALLABLE(jl_new_opaque_closure_jlcall) { if (nargs < 5) From 8ecf196d0c20141c824ef530d39aa5fdcec116af Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 20 Aug 2025 21:08:33 -0300 Subject: [PATCH 02/10] Test: Expand verbose mode (cherry picked from commit 95e132ec10fff6f9564c265390849b56183d8b70) --- src/threading.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/threading.c b/src/threading.c index 655cf26c782b2..9f5c18fe53555 100644 --- a/src/threading.c +++ b/src/threading.c @@ -309,6 +309,8 @@ JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT return jl_current_task->ptls->rngseed; } +typedef void (*unw_tls_ensure_func)(void) JL_NOTSAFEPOINT; + // get thread local rng JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT { @@ -394,7 +396,10 @@ jl_ptls_t jl_init_threadtls(int16_t tid) #if !defined(_OS_WINDOWS_) && !defined(JL_DISABLE_LIBUNWIND) && !defined(LLVMLIBUNWIND) // ensures libunwind TLS space for this thread is allocated eagerly // to make unwinding async-signal-safe even when using thread local caches. - unw_ensure_tls(); + unw_tls_ensure_func jl_unw_ensure_tls = NULL; + jl_dlsym(jl_exe_handle, "unw_ensure_tls", (void**)&jl_unw_ensure_tls, 0); + if (jl_unw_ensure_tls) + jl_unw_ensure_tls(); #endif return ptls; From 0ac5a66abb7802410cac3b04c7acbbb24335f829 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 27 Aug 2025 21:59:47 -0400 Subject: [PATCH 03/10] fix Sockets type stability issues (#59404) fixes #59397 (cherry picked from commit c2fd42e8befb6894ba0aaf0c3ca0adbf51798e75) --- base/stream.jl | 2 +- stdlib/Sockets/src/PipeServer.jl | 2 +- stdlib/Sockets/src/Sockets.jl | 18 ++++++++--- stdlib/Sockets/src/addrinfo.jl | 4 +-- test/trimming/Makefile | 12 ++++++-- test/trimming/hello.jl | 9 ++---- test/trimming/trimmability.jl | 52 ++++++++++++++++++++++++++++++++ test/trimming/trimming.jl | 7 +++-- 8 files changed, 86 insertions(+), 20 deletions(-) create mode 100644 test/trimming/trimmability.jl diff --git a/base/stream.jl b/base/stream.jl index 5732a62c2153b..6103d4ff1bb31 100644 --- a/base/stream.jl +++ b/base/stream.jl @@ -378,7 +378,7 @@ end function isopen(x::Union{LibuvStream, LibuvServer}) if x.status == StatusUninit || x.status == StatusInit || x.handle === C_NULL - throw(ArgumentError("$x is not initialized")) + throw(ArgumentError("stream not initialized")) end return x.status != StatusClosed end diff --git a/stdlib/Sockets/src/PipeServer.jl b/stdlib/Sockets/src/PipeServer.jl index 4a8965c8f0462..d0557e9c83c5f 100644 --- a/stdlib/Sockets/src/PipeServer.jl +++ b/stdlib/Sockets/src/PipeServer.jl @@ -86,7 +86,7 @@ function connect!(sock::PipeEndpoint, path::AbstractString) req = Libc.malloc(Base._sizeof_uv_connect) uv_req_set_data(req, C_NULL) ccall(:uv_pipe_connect, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), req, sock.handle, path, - @cfunction(uv_connectcb, Cvoid, (Ptr{Cvoid}, Cint))) + @cfunction(uv_connectcb_pipe, Cvoid, (Ptr{Cvoid}, Cint))) sock.status = StatusConnecting iolock_end() return sock diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl index f9e0f2f88dd78..7214841aaa41a 100644 --- a/stdlib/Sockets/src/Sockets.jl +++ b/stdlib/Sockets/src/Sockets.jl @@ -456,7 +456,7 @@ function send(sock::UDPSocket, ipaddr::IPAddr, port::Integer, msg) finally Base.sigatomic_end() iolock_begin() - q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct) + q = ct.queue; q === nothing || Base.list_deletefirst!(q::Base.IntrusiveLinkedList{Task}, ct) if uv_req_data(uvw) != C_NULL # uvw is still alive, # so make sure we won't get spurious notifications later @@ -474,9 +474,19 @@ end #from `connect` -function uv_connectcb(conn::Ptr{Cvoid}, status::Cint) +function uv_connectcb_tcp(conn::Ptr{Cvoid}, status::Cint) hand = ccall(:jl_uv_connect_handle, Ptr{Cvoid}, (Ptr{Cvoid},), conn) - sock = @handle_as hand LibuvStream + sock = @handle_as hand TCPSocket + connectcb(conn, status, hand, sock) +end + +function uv_connectcb_pipe(conn::Ptr{Cvoid}, status::Cint) + hand = ccall(:jl_uv_connect_handle, Ptr{Cvoid}, (Ptr{Cvoid},), conn) + sock = @handle_as hand PipeEndpoint + connectcb(conn, status, hand, sock) +end + +function connectcb(conn::Ptr{Cvoid}, status::Cint, hand::Ptr{Cvoid}, sock::LibuvStream) lock(sock.cond) try if status >= 0 # success @@ -508,7 +518,7 @@ function connect!(sock::TCPSocket, host::Union{IPv4, IPv6}, port::Integer) end host_in = Ref(hton(host.host)) uv_error("connect", ccall(:jl_tcp_connect, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, UInt16, Ptr{Cvoid}, Cint), - sock, host_in, hton(UInt16(port)), @cfunction(uv_connectcb, Cvoid, (Ptr{Cvoid}, Cint)), + sock, host_in, hton(UInt16(port)), @cfunction(uv_connectcb_tcp, Cvoid, (Ptr{Cvoid}, Cint)), host isa IPv6)) sock.status = StatusConnecting iolock_end() diff --git a/stdlib/Sockets/src/addrinfo.jl b/stdlib/Sockets/src/addrinfo.jl index f5599b8623a0b..86fe4335e0d37 100644 --- a/stdlib/Sockets/src/addrinfo.jl +++ b/stdlib/Sockets/src/addrinfo.jl @@ -90,7 +90,7 @@ function getalladdrinfo(host::String) finally Base.sigatomic_end() iolock_begin() - q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct) + q = ct.queue; q === nothing || Base.list_deletefirst!(q::Base.IntrusiveLinkedList{Task}, ct) if uv_req_data(req) != C_NULL # req is still alive, # so make sure we don't get spurious notifications later @@ -223,7 +223,7 @@ function getnameinfo(address::Union{IPv4, IPv6}) finally Base.sigatomic_end() iolock_begin() - q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct) + q = ct.queue; q === nothing || Base.list_deletefirst!(q::Base.IntrusiveLinkedList{Task}, ct) if uv_req_data(req) != C_NULL # req is still alive, # so make sure we don't get spurious notifications later diff --git a/test/trimming/Makefile b/test/trimming/Makefile index c3145765655e7..2f29292d10bb5 100644 --- a/test/trimming/Makefile +++ b/test/trimming/Makefile @@ -33,11 +33,14 @@ JULIAC_BUILDSCRIPT := $(shell $(JULIA) -e 'print(joinpath(Sys.BINDIR, Base.DATAR #============================================================================= -release: $(BIN)/hello$(EXE) $(BIN)/basic_jll$(EXE) +release: $(BIN)/hello$(EXE) $(BIN)/trimmability$(EXE) $(BIN)/basic_jll$(EXE) $(BIN)/hello-o.a: $(SRCDIR)/hello.jl $(JULIAC_BUILDSCRIPT) $(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(JULIAC_BUILDSCRIPT) $< --output-exe true +$(BIN)/trimmability-o.a: $(SRCDIR)/trimmability.jl $(JULIAC_BUILDSCRIPT) + $(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(JULIAC_BUILDSCRIPT) $< --output-exe true + $(BIN)/basic_jll-o.a: $(SRCDIR)/basic_jll.jl $(JULIAC_BUILDSCRIPT) $(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --project=$(SRCDIR) -e "using Pkg; Pkg.instantiate()" $(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --project=$(SRCDIR) --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(JULIAC_BUILDSCRIPT) $< --output-exe true @@ -45,14 +48,17 @@ $(BIN)/basic_jll-o.a: $(SRCDIR)/basic_jll.jl $(JULIAC_BUILDSCRIPT) $(BIN)/hello$(EXE): $(BIN)/hello-o.a $(CC) -o $@ $(WHOLE_ARCHIVE) $< $(NO_WHOLE_ARCHIVE) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) +$(BIN)/trimmability$(EXE): $(BIN)/trimmability-o.a + $(CC) -o $@ $(WHOLE_ARCHIVE) $< $(NO_WHOLE_ARCHIVE) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) + $(BIN)/basic_jll$(EXE): $(BIN)/basic_jll-o.a $(CC) -o $@ $(WHOLE_ARCHIVE) $< $(NO_WHOLE_ARCHIVE) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) -check: $(BIN)/hello$(EXE) $(BIN)/basic_jll$(EXE) +check: $(BIN)/hello$(EXE) $(BIN)/trimmability$(EXE) $(BIN)/basic_jll$(EXE) $(JULIA) --depwarn=error $(SRCDIR)/trimming.jl $< clean: - -rm -f $(BIN)/hello$(EXE) $(BIN)/basic_jll$(EXE) $(BIN)/hello-o.a $(BIN)/basic_jll-o.a + -rm -f $(BIN)/hello$(EXE) $(BIN)/trimmability$(EXE) $(BIN)/basic_jll$(EXE) $(BIN)/hello-o.a $(BIN)/trimmability-o.a $(BIN)/basic_jll-o.a .PHONY: release clean check diff --git a/test/trimming/hello.jl b/test/trimming/hello.jl index bb2ca585f3662..620a55b171544 100644 --- a/test/trimming/hello.jl +++ b/test/trimming/hello.jl @@ -1,11 +1,6 @@ -world::String = "world!" -const str = OncePerProcess{String}() do - return "Hello, " * world -end +# Test that minimal executable size stays low function @main(args::Vector{String})::Cint - println(Core.stdout, str()) - println(Core.stdout, PROGRAM_FILE) - foreach(x->println(Core.stdout, x), args) + println(Core.stdout, "Hello, world!") return 0 end diff --git a/test/trimming/trimmability.jl b/test/trimming/trimmability.jl new file mode 100644 index 0000000000000..041f3621216a6 --- /dev/null +++ b/test/trimming/trimmability.jl @@ -0,0 +1,52 @@ +# Test that various constructs support trimming + +using Sockets + +world::String = "world!" +const str = OncePerProcess{String}() do + return "Hello, " * world +end + +abstract type Shape end +struct Square <: Shape + side::Float64 +end +struct Circle <: Shape + radius::Float64 +end +area(s::Square) = s.side^2 +area(c::Circle) = pi*c.radius^2 + +sum_areas(v::Vector{Shape}) = sum(area, v) + +function @main(args::Vector{String})::Cint + println(Core.stdout, str()) + println(Core.stdout, PROGRAM_FILE) + foreach(x->println(Core.stdout, x), args) + + # test map/mapreduce; should work but relies on inlining and other optimizations + # test that you can dispatch to some number of concrete cases + println(Core.stdout, sum_areas(Shape[Circle(1), Square(2)])) + + arr = rand(10) + sorted_arr = sort(arr) + tot = sum(sorted_arr) + tot = prod(sorted_arr) + a = any(x -> x > 0, sorted_arr) + b = all(x -> x >= 0, sorted_arr) + c = map(x -> x^2, sorted_arr) + d = mapreduce(x -> x^2, +, sorted_arr) + # e = reduce(xor, rand(Int, 10)) + + try + sock = connect("localhost", 4900) + if isopen(sock) + write(sock, "Hello") + flush(sock) + close(sock) + end + catch + end + + return 0 +end diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl index 5d55ed62b03a8..07e9d92871f19 100644 --- a/test/trimming/trimming.jl +++ b/test/trimming/trimming.jl @@ -6,8 +6,11 @@ bindir = dirname(ARGS[1]) let exe_suffix = splitext(Base.julia_exename())[2] hello_exe = joinpath(bindir, "hello" * exe_suffix) - @test readchomp(`$hello_exe arg1 arg2`) == "Hello, world!\n$hello_exe\narg1\narg2" - @test filesize(hello_exe) < 2_000_000 + @test readchomp(`$hello_exe arg1 arg2`) == "Hello, world!" + @test filesize(hello_exe) < 1_900_000 + + trimmability_exe = joinpath(bindir, "trimmability" * exe_suffix) + @test readchomp(`$trimmability_exe arg1 arg2`) == "Hello, world!\n$trimmability_exe\narg1\narg2\n$(4.0+pi)" basic_jll_exe = joinpath(bindir, "basic_jll" * exe_suffix) lines = split(readchomp(`$basic_jll_exe`), "\n") From 05fb4a135d9f9ee3b92c89266694805f780d821a Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 31 Jul 2025 02:11:35 -0400 Subject: [PATCH 04/10] build: Fix libssh2 source build (#59164) Addresses the same CI failure that https://github.com/JuliaLang/julia/pull/59141 was intended to address. The issue here is twofold. First, in a source build, we failed to install openssl header files. Second, even if we had done this, libssh needs to be explicitly told where to find openssl, otherwise it might prefer systme files. (cherry picked from commit 5ddd7215951d096d4bad0f5e153a18930ed39576) --- deps/libssh2.mk | 1 + deps/openssl.mk | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/deps/libssh2.mk b/deps/libssh2.mk index 3f802db15be6d..661e26516c828 100644 --- a/deps/libssh2.mk +++ b/deps/libssh2.mk @@ -22,6 +22,7 @@ LIBSSH2_OPTS += -G"MSYS Makefiles" endif else LIBSSH2_OPTS += -DCRYPTO_BACKEND=OpenSSL -DENABLE_ZLIB_COMPRESSION=OFF +LIBSSH2_OPTS += -DOPENSSL_ROOT_DIR=$(build_prefix) endif ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD)) diff --git a/deps/openssl.mk b/deps/openssl.mk index 6f96717b2fb74..705303432c2c6 100644 --- a/deps/openssl.mk +++ b/deps/openssl.mk @@ -72,14 +72,21 @@ ifeq ($(OS),$(BUILD_OS)) endif echo 1 > $@ +# Override bindir and only install runtime libraries, otherwise they'll go into build_depsbindir. +OPENSSL_INSTALL = \ + mkdir -p $2$$(build_shlibdir) && \ + $$(MAKE) -C $1 install_dev $$(MAKE_COMMON) bindir=$$(build_shlibdir) $3 DESTDIR="$2" + +OPENSSL_POST_INSTALL := \ + $(WIN_MAKE_HARD_LINK) $(build_bindir)/libcrypto-*.dll $(build_bindir)/libcrypto.dll && \ + $(WIN_MAKE_HARD_LINK) $(build_bindir)/libssl-*.dll $(build_bindir)/libssl.dll && \ + $(INSTALL_NAME_CMD)libcrypto.$(SHLIB_EXT) $(build_shlibdir)/libcrypto.$(SHLIB_EXT) && \ + $(INSTALL_NAME_CMD)libssl.$(SHLIB_EXT) $(build_shlibdir)/libssl.$(SHLIB_EXT) && \ + $(INSTALL_NAME_CHANGE_CMD) $(build_shlibdir)/libcrypto.3.dylib @rpath/libcrypto.$(SHLIB_EXT) $(build_shlibdir)/libssl.$(SHLIB_EXT) + $(eval $(call staged-install, \ openssl,openssl-$(OPENSSL_VER), \ - MAKE_INSTALL,,, \ - $$(WIN_MAKE_HARD_LINK) $(build_bindir)/libcrypto-*.dll $(build_bindir)/libcrypto.dll && \ - $$(WIN_MAKE_HARD_LINK) $(build_bindir)/libssl-*.dll $(build_bindir)/libssl.dll && \ - $$(INSTALL_NAME_CMD)libcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libcrypto.$$(SHLIB_EXT) && \ - $$(INSTALL_NAME_CMD)libssl.$$(SHLIB_EXT) $$(build_shlibdir)/libssl.$$(SHLIB_EXT) && \ - $$(INSTALL_NAME_CHANGE_CMD) $$(build_shlibdir)/libcrypto.3.dylib @rpath/libcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libssl.$$(SHLIB_EXT))) + OPENSSL_INSTALL,,,$(OPENSSL_POST_INSTALL))) clean-openssl: -rm -f $(BUILDDIR)/-openssl-$(OPENSSL_VER)/build-configured $(BUILDDIR)/-openssl-$(OPENSSL_VER)/build-compiled From c124bf92a4bcf8063c68b7fffc2d5a2186a1ef9c Mon Sep 17 00:00:00 2001 From: Leon Date: Tue, 16 Sep 2025 02:09:05 +1000 Subject: [PATCH 05/10] docs: Add headers for easier Core, Base and stdlib reference (#55311) This adds one new section (h2) and a nested subsection (h3) to the landing page of the Julia documentation website and associated PDF file. The new section provides a quick overview of the differences between `Core`, `Base` and the standard library, as well as a list of links to standard library packages. The list of standard library packages is thus duplicated in the website sidebar, but I don't think this is an issue because the sidebar is primarily intended for navigation, whereas this list is primarily intended to be used for referencing purposes. Partially addresses #28712, although I don't provide short descriptions of each standard library package. I think the appropriate place for that would be in module-level docstrings of the packages themselves, which are not currently available. --------- Co-authored-by: Viral B. Shah (cherry picked from commit abb104c56f5a43e6511bb5cfa1a6586a4fc8cd46) --- doc/src/base/base.md | 4 ++-- doc/src/index.md | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/doc/src/base/base.md b/doc/src/base/base.md index e6c8ff554d494..d274ec8f01721 100644 --- a/doc/src/base/base.md +++ b/doc/src/base/base.md @@ -112,14 +112,14 @@ where [] ``` -## Standard Modules +## [Standard Modules](@id standard-modules) ```@docs Main Core Base ``` -## Base Submodules +## [Base Submodules](@id base-submodules) ```@docs Base.Broadcast Base.Docs diff --git a/doc/src/index.md b/doc/src/index.md index 8342ff448625d..7d781f25b4235 100644 --- a/doc/src/index.md +++ b/doc/src/index.md @@ -76,7 +76,7 @@ and [Ruby](https://en.wikipedia.org/wiki/Ruby_(programming_language)). The most significant departures of Julia from typical dynamic languages are: - * The core language imposes very little; Julia Base and the standard library are written in Julia itself, including + * The core language imposes very little; [Julia Base and the standard library](@ref man-core-base-and-stdlib) are written in Julia itself, including primitive operations like integer arithmetic * A rich language of types for constructing and describing objects, that can also optionally be used to make type declarations @@ -126,3 +126,40 @@ language. In addition to the above, some advantages of Julia over comparable sys * Call C functions directly (no wrappers or special APIs needed) * Powerful shell-like capabilities for managing other processes * Lisp-like macros and other metaprogramming facilities + +## [Julia Standard Modules and the Standard Library](@id man-standard-modules-stdlib) + +The Julia runtime comes with [standard modules](@ref standard-modules), +which are essential namespaces that are usually loaded automatically. + +```@docs; canonical=false +Core +Base +``` + +Julia's `Base` module contains various [useful submodules](@ref base-submodules). + +### [The Standard Library](@id man-stdlib) + +The Julia standard library contains additional, commonly used packages that are installed alongside the Julia runtime by default. +To use a standard library package, it is first necessary to load the package with a [`using`](@ref) or [`import`](@ref) statement. +Links to available standard library packages are provided below, +and may also be found in the website sidebar. +Their source code is available in the `Sys.STDLIB` directory of a Julia installation. + +```@eval +import Markdown +list = sort(filter(x -> match(r"_jll$", x) === nothing, readdir(Sys.STDLIB))) +Markdown.parse(join("- [`" .* list .* "`](stdlib/" .* list .* ".html)", "\n")) +``` + +Julia also provides various standard, pre-built binary libraries +of established software that is written in other languages. +By convention, these packages have names that end with `_jll`. +The [`using`](@ref) statement can load symbol names from these binary libraries: + +```@eval +import Markdown +list = sort(filter(x -> match(r"_jll$", x) !== nothing, readdir(Sys.STDLIB))) +Markdown.parse(join("- [`" .* list .* "`](stdlib/" .* list .* ".html)", "\n")) +``` From 878ff5ec5cc98cc181e68edf2e767b12303c3c8c Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 15 Sep 2025 20:33:37 -0400 Subject: [PATCH 06/10] codegen: mark write barrier field load as volatile (#59559) This is as an alternative to changing all allocation functions to mutating all memory and returning an aliasing pointer. This operates usually late in the pipeline, so either should not make much difference, but I think it should suffice to mark this volatile to prevent any de-duplication of this load, and this should also be most conservative for GC but more liberal for other optimizations. Fixes #59547 Produced with dubious help by Claude. (cherry picked from commit 218f691d16edbda33647d2b938b882a7ac08f057) --- Compiler/test/codegen.jl | 6 +-- src/llvm-late-gc-lowering.cpp | 3 ++ test/llvmpasses/gc-writebarrier-volatile.ll | 41 +++++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 test/llvmpasses/gc-writebarrier-volatile.ll diff --git a/Compiler/test/codegen.jl b/Compiler/test/codegen.jl index be2df190764a0..32c279da9bda6 100644 --- a/Compiler/test/codegen.jl +++ b/Compiler/test/codegen.jl @@ -133,14 +133,14 @@ if !is_debug_build && opt_level > 0 # Array test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Vector{Int}})), [Iptr]) # As long as the eltype is known we don't need to load the elsize, but do need to check isvector - @test_skip test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Array{Any}})), ["atomic $Iptr", "ptr", "ptr", Iptr, Iptr, "ptr", Iptr]) + @test_skip test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Array{Any}})), ["atomic volatile $Iptr", "ptr", "ptr", Iptr, Iptr, "ptr", Iptr]) # Memory test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory{Int}})), [Iptr]) # As long as the eltype is known we don't need to load the elsize test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory{Any}})), [Iptr]) # Check that we load the elsize and isunion from the typeof layout - test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic $Iptr", "ptr", "i32", "i16"]) - test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic $Iptr", "ptr", "i32", "i16"]) + test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic volatile $Iptr", "ptr", "i32", "i16"]) + test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic volatile $Iptr", "ptr", "i32", "i16"]) # Primitive Type size should be folded to a constant test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Ptr})), String[]) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 25ec6959eddf1..1d262ff7968b0 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1899,6 +1899,9 @@ Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value * auto &M = *builder.GetInsertBlock()->getModule(); LoadInst *load = builder.CreateAlignedLoad(T_size, addr, M.getDataLayout().getPointerABIAlignment(0), V->getName() + ".tag"); load->setOrdering(AtomicOrdering::Unordered); + // Mark as volatile to prevent optimizers from treating GC tag loads as constants + // since GC mark bits can change during runtime (issue #59547) + load->setVolatile(true); load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); MDBuilder MDB(load->getContext()); auto *NullInt = ConstantInt::get(T_size, 0); diff --git a/test/llvmpasses/gc-writebarrier-volatile.ll b/test/llvmpasses/gc-writebarrier-volatile.ll new file mode 100644 index 0000000000000..c4bfafdb670f8 --- /dev/null +++ b/test/llvmpasses/gc-writebarrier-volatile.ll @@ -0,0 +1,41 @@ +; This file is a part of Julia. License is MIT: https://julialang.org/license + +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC,gvn)' -S %s | FileCheck %s + +; Test for issue #59547: Ensure write barrier GC tag loads are volatile +; This test verifies that the LateLowerGCFrame pass marks GC tag loads as volatile +; to prevent GVN from incorrectly constant-folding them, which would eliminate +; necessary write barrier checks. + +@tag = external addrspace(10) global {}, align 16 + +declare void @julia.write_barrier({} addrspace(10)*, {} addrspace(10)*) +declare {}*** @julia.get_pgcstack() +declare {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) + +; Test that write barrier expansion produces volatile GC tag loads +; CHECK-LABEL: @test_writebarrier_volatile_tags +define {} addrspace(10)* @test_writebarrier_volatile_tags() { +top: + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** + %parent = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) + %child = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) + call void @julia.write_barrier({} addrspace(10)* %parent, {} addrspace(10)* %child) + ret {} addrspace(10)* %parent + +; The critical test: GC tag loads must be volatile to prevent constant folding +; CHECK: load atomic volatile i64, ptr {{.*}} unordered, align 8, {{.*}}!tbaa +; CHECK: and i64 {{.*}}, 3 +; CHECK: icmp eq i64 {{.*}}, 3 +; CHECK: br i1 {{.*}}, label %may_trigger_wb, label + +; CHECK: may_trigger_wb: +; CHECK: load atomic volatile i64, ptr {{.*}} unordered, align 8, {{.*}}!tbaa +; CHECK: and i64 {{.*}}, 1 +; CHECK: icmp eq i64 {{.*}}, 0 +; CHECK: br i1 {{.*}}, label %trigger_wb, label + +; CHECK: trigger_wb: +; CHECK: call void @ijl_gc_queue_root(ptr {{.*}}) +} From 5f948592c2adc8d0430a5d182d268e9ff9670743 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Tue, 16 Sep 2025 22:12:46 -0500 Subject: [PATCH 07/10] Only apply Base.Sort.SubArrayOptimization when iszero(v.offset1) (#59572) Fixes #59569 Thanks @N5N3 for finding the bug! For 1.13, we could re-enable this optimization and propagate the information to the sub-alg or have the sub-alg's embedded indices be relative to `kw.lo` rather than absolute. For backports, this minimal change is more appropriate. (cherry picked from commit 067b0133a8ec3f5c81d89525cd6cb4f25df6cb9c) --- base/sort.jl | 7 +++++-- test/sorting.jl | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 8254f56b3f952..db865150edf57 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -563,12 +563,15 @@ function _sort!(v::UnwrappableSubArray, a::SubArrayOptimization, o::Ordering, kw @getkw lo hi # @assert v.stride1 == 1 parent = v.parent - if parent isa Array && !(parent isa Vector) && hi - lo < 100 + if parent isa Array && !(parent isa Vector) && hi - lo < 100 || !iszero(v.offset1) # vec(::Array{T, โ‰ 1}) allocates and is therefore somewhat expensive. # We don't want that for small inputs. + + # Additionally, if offset1 is non-zero, then this optimization is incompatible with + # algorithms that track absolute first and last indices (e.g. ScratchQuickSort) _sort!(v, a.next, o, kw) else - _sort!(vec(parent), a.next, o, (;kw..., lo = lo + v.offset1, hi = hi + v.offset1)) + _sort!(vec(parent), a.next, o, kw) end end diff --git a/test/sorting.jl b/test/sorting.jl index e16b30de5bfc8..a0bbc51a3f239 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -1122,6 +1122,11 @@ end end end +@testset "partialsort! for UnwrappableSubArray with non-zero offset on 1.11 (#59569)" begin + a = reshape(6000:-1:1, 1000, :) |> collect; + @test partialsort!(view(copy(a), :, 6), 500:501) == [500, 501] +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From 6be73b43ecb39ee43ce6718a2396f2e65e9f5780 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Wed, 17 Sep 2025 12:47:55 -0400 Subject: [PATCH 08/10] Disable broken trimming `mapreduce` test --- test/trimming/trimmability.jl | 8 +++++--- test/trimming/trimming.jl | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/test/trimming/trimmability.jl b/test/trimming/trimmability.jl index 041f3621216a6..acba8244534ef 100644 --- a/test/trimming/trimmability.jl +++ b/test/trimming/trimmability.jl @@ -24,9 +24,11 @@ function @main(args::Vector{String})::Cint println(Core.stdout, PROGRAM_FILE) foreach(x->println(Core.stdout, x), args) - # test map/mapreduce; should work but relies on inlining and other optimizations - # test that you can dispatch to some number of concrete cases - println(Core.stdout, sum_areas(Shape[Circle(1), Square(2)])) + # broken on 1.12 + # + # # test map/mapreduce; should work but relies on inlining and other optimizations + # # test that you can dispatch to some number of concrete cases + # println(Core.stdout, sum_areas(Shape[Circle(1), Square(2)])) arr = rand(10) sorted_arr = sort(arr) diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl index 07e9d92871f19..d46ad3bb74e34 100644 --- a/test/trimming/trimming.jl +++ b/test/trimming/trimming.jl @@ -10,7 +10,7 @@ let exe_suffix = splitext(Base.julia_exename())[2] @test filesize(hello_exe) < 1_900_000 trimmability_exe = joinpath(bindir, "trimmability" * exe_suffix) - @test readchomp(`$trimmability_exe arg1 arg2`) == "Hello, world!\n$trimmability_exe\narg1\narg2\n$(4.0+pi)" + @test readchomp(`$trimmability_exe arg1 arg2`) == "Hello, world!\n$trimmability_exe\narg1\narg2" basic_jll_exe = joinpath(bindir, "basic_jll" * exe_suffix) lines = split(readchomp(`$basic_jll_exe`), "\n") From db87ab5853ced1c63acb360dc934823a7e3daae2 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 19 Sep 2025 10:44:49 +0200 Subject: [PATCH 09/10] move `REPL/src/Terminals.jl` into Base to work around significant invalidations from it (#59590) (cherry picked from commit a25decdcdcf01141ce3e8bbed901e53797dcbbf8) --- base/Base.jl | 1 + {stdlib/REPL/src => base}/Terminals.jl | 0 contrib/generate_precompile.jl | 8 ++++++++ stdlib/REPL/src/REPL.jl | 3 +-- 4 files changed, 10 insertions(+), 2 deletions(-) rename {stdlib/REPL/src => base}/Terminals.jl (100%) diff --git a/base/Base.jl b/base/Base.jl index f8905bf49ce79..5940f32d4bd71 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -174,6 +174,7 @@ using .Filesystem include("cmd.jl") include("process.jl") include("terminfo.jl") +include("Terminals.jl") # Moved from REPL to reduce invalidations include("secretbuffer.jl") # core math functions diff --git a/stdlib/REPL/src/Terminals.jl b/base/Terminals.jl similarity index 100% rename from stdlib/REPL/src/Terminals.jl rename to base/Terminals.jl diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index 45b4a9a485a4d..88972be56ab4d 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -34,6 +34,14 @@ hardcoded_precompile_statements = """ precompile(Base.unsafe_string, (Ptr{UInt8},)) precompile(Base.unsafe_string, (Ptr{Int8},)) +# used by REPL +precompile(Tuple{typeof(Base.getproperty), Base.Terminals.TTYTerminal, Symbol}) +precompile(Tuple{typeof(Base.reseteof), Base.Terminals.TTYTerminal}) +precompile(Tuple{typeof(Base.Terminals.enable_bracketed_paste), Base.Terminals.TTYTerminal}) +precompile(Tuple{typeof(Base.Terminals.width), Base.Terminals.TTYTerminal}) +precompile(Tuple{typeof(Base.Terminals.height), Base.Terminals.TTYTerminal}) +precompile(Tuple{typeof(Base.write), Base.Terminals.TTYTerminal, Array{UInt8, 1}}) + # loading.jl - without these each precompile worker would precompile these because they're hit before pkgimages are loaded precompile(Base.__require, (Module, Symbol)) precompile(Base.__require, (Base.PkgId,)) diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index d0473d8f8d6f1..b0abb6e78f997 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -59,8 +59,7 @@ import Base: _displaysize(io::IO) = displaysize(io)::Tuple{Int,Int} -include("Terminals.jl") -using .Terminals +using Base.Terminals abstract type AbstractREPL end From 8327e55dfb33c682ed0589a0607b8daac4f43a0a Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 17 Sep 2025 13:05:59 -0400 Subject: [PATCH 10/10] optimizations: better modeling and codegen for apply and svec calls (#59548) - Use svec instead of tuple for arguments (better match for ABI which will require boxes) - Directly forward single svec argument, both runtime and codegen, without copying. - Optimize all consistant builtin functions of constant arguments, not just ones with special tfuncs. Reducing code duplication and divergence. - Codegen for `svec()` directly, so optimizer can see each store (and doesn't have to build the whole thing on the stack first). Written with help by Claude --- Compiler/src/ssair/passes.jl | 46 +++++++++++++++++++++++ Compiler/src/tfuncs.jl | 43 +++++++++++++-------- Compiler/test/effects.jl | 2 +- base/essentials.jl | 6 +-- src/builtin_proto.h | 1 + src/builtins.c | 18 ++++++++- src/cgutils.cpp | 29 ++++++++++++--- src/codegen.cpp | 72 +++++++++++++++++++++++++++++++----- 8 files changed, 179 insertions(+), 38 deletions(-) diff --git a/Compiler/src/ssair/passes.jl b/Compiler/src/ssair/passes.jl index 46ed299167060..1db3b84afd882 100644 --- a/Compiler/src/ssair/passes.jl +++ b/Compiler/src/ssair/passes.jl @@ -872,6 +872,49 @@ function perform_lifting!(compact::IncrementalCompact, return Pair{Any, PhiNest}(stmt_val, PhiNest(visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping, walker_callback)) end +function lift_apply_args!(compact::IncrementalCompact, idx::Int, stmt::Expr, ๐•ƒโ‚’::AbstractLattice) + # Handle _apply_iterate calls: convert arguments to use `Core.svec`. The behavior of Core.svec (with boxing) better matches the ABI of codegen. + compact[idx] = nothing + for i in 4:length(stmt.args) # Skip iterate function, f, and first iterator + arg = stmt.args[i] + arg_type = argextype(arg, compact) + svec_args = nothing + if isa(arg_type, DataType) && arg_type.name === Tuple.name + if isa(arg, SSAValue) + arg_stmt = compact[arg][:stmt] + if is_known_call(arg_stmt, Core.tuple, compact) + svec_args = copy(arg_stmt.args) + end + end + if svec_args === nothing + # Fallback path: generate getfield calls for tuple elements + tuple_length = length(arg_type.parameters) + if tuple_length > 0 && !isvarargtype(arg_type.parameters[tuple_length]) + svec_args = Vector{Any}(undef, tuple_length + 1) + for j in 1:tuple_length + getfield_call = Expr(:call, GlobalRef(Core, :getfield), arg, j) + getfield_type = arg_type.parameters[j] + inst = compact[SSAValue(idx)] + getfield_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(getfield_call, getfield_type, NoCallInfo(), inst[:line], inst[:flag])) + svec_args[j + 1] = getfield_ssa + end + end + end + end + # Create Core.svec call if we have arguments + if svec_args !== nothing + svec_args[1] = GlobalRef(Core, :svec) + new_svec_call = Expr(:call) + new_svec_call.args = svec_args + inst = compact[SSAValue(idx)] + new_svec_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(new_svec_call, SimpleVector, NoCallInfo(), inst[:line], inst[:flag])) + stmt.args[i] = new_svec_ssa + end + end + compact[idx] = stmt + nothing +end + function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr) length(stmt.args) != 3 && return @@ -1375,6 +1418,9 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) compact[SSAValue(idx)] = (compact[enter_ssa][:stmt]::EnterNode).scope elseif isexpr(stmt, :new) refine_new_effects!(๐•ƒโ‚’, compact, idx, stmt) + elseif is_known_call(stmt, Core._apply_iterate, compact) + length(stmt.args) >= 4 || continue + lift_apply_args!(compact, idx, stmt, ๐•ƒโ‚’) end continue end diff --git a/Compiler/src/tfuncs.jl b/Compiler/src/tfuncs.jl index b49885aa46721..33c866c401811 100644 --- a/Compiler/src/tfuncs.jl +++ b/Compiler/src/tfuncs.jl @@ -585,6 +585,15 @@ end add_tfunc(nfields, 1, 1, nfields_tfunc, 1) add_tfunc(Core._expr, 1, INT_INF, @nospecs((๐•ƒ::AbstractLattice, args...)->Expr), 100) add_tfunc(svec, 0, INT_INF, @nospecs((๐•ƒ::AbstractLattice, args...)->SimpleVector), 20) + +@nospecs function _svec_len_tfunc(๐•ƒ::AbstractLattice, s) + if isa(s, Const) && isa(s.val, SimpleVector) + return Const(length(s.val)) + end + return Int +end +add_tfunc(Core._svec_len, 1, 1, _svec_len_tfunc, 1) + @nospecs function _svec_ref_tfunc(๐•ƒ::AbstractLattice, s, i) if isa(s, Const) && isa(i, Const) s, i = s.val, i.val @@ -1986,15 +1995,8 @@ function tuple_tfunc(๐•ƒ::AbstractLattice, argtypes::Vector{Any}) # UnionAll context is missing around this. pop!(argtypes) end - all_are_const = true - for i in 1:length(argtypes) - if !isa(argtypes[i], Const) - all_are_const = false - break - end - end - if all_are_const - return Const(ntuple(i::Int->argtypes[i].val, length(argtypes))) + if is_all_const_arg(argtypes, 1) # repeated from builtin_tfunction for the benefit of callers that use this tfunc directly + return Const(tuple(collect_const_args(argtypes, 1)...)) end params = Vector{Any}(undef, length(argtypes)) anyinfo = false @@ -2359,6 +2361,9 @@ function _builtin_nothrow(๐•ƒ::AbstractLattice, @nospecialize(f::Builtin), argt elseif f === Core.compilerbarrier na == 2 || return false return compilerbarrier_nothrow(argtypes[1], nothing) + elseif f === Core._svec_len + na == 1 || return false + return _svec_len_tfunc(๐•ƒ, argtypes[1]) isa Const elseif f === Core._svec_ref na == 2 || return false return _svec_ref_tfunc(๐•ƒ, argtypes[1], argtypes[2]) isa Const @@ -2366,7 +2371,7 @@ function _builtin_nothrow(๐•ƒ::AbstractLattice, @nospecialize(f::Builtin), argt return false end -# known to be always effect-free (in particular nothrow) +# known to be always effect-free (in particular also nothrow) const _PURE_BUILTINS = Any[ tuple, svec, @@ -2395,6 +2400,8 @@ const _CONSISTENT_BUILTINS = Any[ donotdelete, memoryrefnew, memoryrefoffset, + Core._svec_len, + Core._svec_ref, ] # known to be effect-free (but not necessarily nothrow) @@ -2419,6 +2426,7 @@ const _EFFECT_FREE_BUILTINS = [ Core.throw_methoderror, getglobal, compilerbarrier, + Core._svec_len, Core._svec_ref, ] @@ -2453,6 +2461,7 @@ const _ARGMEM_BUILTINS = Any[ replacefield!, setfield!, swapfield!, + Core._svec_len, Core._svec_ref, ] @@ -2637,7 +2646,7 @@ function builtin_effects(๐•ƒ::AbstractLattice, @nospecialize(f::Builtin), argty else if contains_is(_CONSISTENT_BUILTINS, f) consistent = ALWAYS_TRUE - elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_ref + elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_len || f === Core._svec_ref consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY elseif f === Core._typevar || f === Core.memorynew consistent = CONSISTENT_IF_NOTRETURNED @@ -2746,11 +2755,12 @@ end function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any}, sv::Union{AbsIntState, Nothing}) ๐•ƒแตข = typeinf_lattice(interp) - if isa(f, IntrinsicFunction) - if is_pure_intrinsic_infer(f) && all(@nospecialize(a) -> isa(a, Const), argtypes) - argvals = anymap(@nospecialize(a) -> (a::Const).val, argtypes) + # Early constant evaluation for foldable builtins with all const args + if isa(f, IntrinsicFunction) ? is_pure_intrinsic_infer(f) : (f in _PURE_BUILTINS || (f in _CONSISTENT_BUILTINS && f in _EFFECT_FREE_BUILTINS)) + if is_all_const_arg(argtypes, 1) + argvals = collect_const_args(argtypes, 1) try - # unroll a few cases which have specialized codegen + # unroll a few common cases for better codegen if length(argvals) == 1 return Const(f(argvals[1])) elseif length(argvals) == 2 @@ -2764,6 +2774,8 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp return Bottom end end + end + if isa(f, IntrinsicFunction) iidx = Int(reinterpret(Int32, f)) + 1 if iidx < 0 || iidx > length(T_IFUNC) # unknown intrinsic @@ -2790,6 +2802,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp end tf = T_FFUNC_VAL[fidx] end + if hasvarargtype(argtypes) if length(argtypes) - 1 > tf[2] # definitely too many arguments diff --git a/Compiler/test/effects.jl b/Compiler/test/effects.jl index 720825aa145f8..79d16602a8d92 100644 --- a/Compiler/test/effects.jl +++ b/Compiler/test/effects.jl @@ -1466,7 +1466,7 @@ end let effects = Base.infer_effects((Core.SimpleVector,Int); optimize=false) do svec, i Core._svec_ref(svec, i) end - @test !Compiler.is_consistent(effects) + @test Compiler.is_consistent(effects) @test Compiler.is_effect_free(effects) @test !Compiler.is_nothrow(effects) @test Compiler.is_terminates(effects) diff --git a/base/essentials.jl b/base/essentials.jl index b817e53ec6c4d..412629411e36a 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -934,11 +934,7 @@ setindex!(A::MemoryRef{Any}, @nospecialize(x)) = (memoryrefset!(A, x, :not_atomi getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref(v, i)) function length(v::SimpleVector) - @_total_meta - t = @_gc_preserve_begin v - len = unsafe_load(Ptr{Int}(pointer_from_objref(v))) - @_gc_preserve_end t - return len + Core._svec_len(v) end firstindex(v::SimpleVector) = 1 lastindex(v::SimpleVector) = length(v) diff --git a/src/builtin_proto.h b/src/builtin_proto.h index 586d948f722c1..53414d190906c 100644 --- a/src/builtin_proto.h +++ b/src/builtin_proto.h @@ -19,6 +19,7 @@ extern "C" { XX(_primitivetype,"_primitivetype") \ XX(_setsuper,"_setsuper!") \ XX(_structtype,"_structtype") \ + XX(_svec_len,"_svec_len") \ XX(_svec_ref,"_svec_ref") \ XX(_typebody,"_typebody!") \ XX(_typevar,"_typevar") \ diff --git a/src/builtins.c b/src/builtins.c index 0ff1a1cf61491..c57a677198754 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -693,9 +693,15 @@ JL_CALLABLE(jl_f__apply_iterate) return (jl_value_t*)t; } } - else if (f == BUILTIN(tuple) && jl_is_tuple(args[1])) { - return args[1]; + else if (f == BUILTIN(tuple)) { + if (jl_is_tuple(args[1])) + return args[1]; + if (jl_is_svec(args[1])) + return jl_f_tuple(NULL, jl_svec_data(args[1]), jl_svec_len(args[1])); } + // optimization for `f(svec...)` + if (jl_is_svec(args[1])) + return jl_apply_generic(f, jl_svec_data(args[1]), jl_svec_len(args[1])); } // estimate how many real arguments we appear to have size_t precount = 1; @@ -2091,6 +2097,14 @@ JL_CALLABLE(jl_f__compute_sparams) return (jl_value_t*)env; } +JL_CALLABLE(jl_f__svec_len) +{ + JL_NARGS(_svec_len, 1, 1); + jl_svec_t *s = (jl_svec_t*)args[0]; + JL_TYPECHK(_svec_len, simplevector, (jl_value_t*)s); + return jl_box_long(jl_svec_len(s)); +} + JL_CALLABLE(jl_f__svec_ref) { JL_NARGS(_svec_ref, 2, 2); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 65fbd80c303b5..138fa54949cf8 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2228,6 +2228,9 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j } Value *instr = nullptr; if (!isboxed && jl_is_genericmemoryref_type(jltype)) { + //We don't specify the stronger expected memory ordering here because of fears it may interfere with vectorization and other optimizations + //if (Order == AtomicOrdering::NotAtomic) + // Order = AtomicOrdering::Monotonic; // load these FCA as individual fields, so LLVM does not need to split them later Value *fld0 = ctx.builder.CreateStructGEP(elty, ptr, 0); LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false); @@ -2401,11 +2404,26 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, instr = load; } if (r) { - StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment)); - store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.noalias = MDNode::concatenate(aliasscope, ai.noalias); - ai.decorateInst(store); + if (false && !isboxed && Order == AtomicOrdering::NotAtomic && jl_is_genericmemoryref_type(jltype)) { + // if enabled, store these FCA as individual fields, so LLVM does not need to split them later and they can use release ordering + assert(r->getType() == ctx.types().T_jlgenericmemory); + Value *f1 = ctx.builder.CreateExtractValue(r, 0); + Value *f2 = ctx.builder.CreateExtractValue(r, 1); + static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order"); + StoreInst *store = ctx.builder.CreateAlignedStore(f1, ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, ptr, 0), Align(alignment)); + store->setOrdering(AtomicOrdering::Release); + ai.decorateInst(store); + store = ctx.builder.CreateAlignedStore(f2, ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, ptr, 1), Align(alignment)); + store->setOrdering(AtomicOrdering::Release); + ai.decorateInst(store); + } + else { + StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment)); + store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order); + ai.decorateInst(store); + } } else { assert(Order == AtomicOrdering::NotAtomic && !isboxed && rhs.typ == jltype); @@ -4377,10 +4395,11 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg for (size_t i = nargs; i < nf; i++) { if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, strctinfo.tbaa); - ai.decorateInst(ctx.builder.CreateAlignedStore( + auto *store = ctx.builder.CreateAlignedStore( ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), emit_ptrgep(ctx, strct, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1), - Align(1))); + Align(1)); + ai.decorateInst(store); } } // TODO: verify that nargs <= nf (currently handled by front-end) diff --git a/src/codegen.cpp b/src/codegen.cpp index 6c4543fcf0049..3189171e9b30a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -4072,21 +4072,38 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } } - else if ((f == BUILTIN(_apply_iterate) && nargs == 3) && ctx.vaSlot > 0) { + else if (f == BUILTIN(_apply_iterate) && nargs == 3) { // turn Core._apply_iterate(iter, f, Tuple) ==> f(Tuple...) using the jlcall calling convention if Tuple is the va allocation - if (LoadInst *load = dyn_cast_or_null(argv[3].V)) { - if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) { - Value *theF = boxed(ctx, argv[2]); - Value *nva = emit_n_varargs(ctx); + if (ctx.vaSlot > 0) { + if (LoadInst *load = dyn_cast_or_null(argv[3].V)) { + if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) { + Value *theF = boxed(ctx, argv[2]); + Value *nva = emit_n_varargs(ctx); #ifdef _P64 - nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext())); + nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext())); #endif - Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)); - Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva }); - *ret = mark_julia_type(ctx, r, true, jl_any_type); - return true; + Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)); + Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva }); + *ret = mark_julia_type(ctx, r, true, jl_any_type); + return true; + } } } + // optimization for _apply_iterate when there is one argument and it is a SimpleVector + const jl_cgval_t &arg = argv[3]; + if (arg.typ == (jl_value_t*)jl_simplevector_type) { + Value *theF = boxed(ctx, argv[2]); + Value *svec_val = boxed(ctx, arg); + Value *svec_len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, decay_derived(ctx, svec_val), Align(ctx.types().sizeof_ptr)); +#ifdef _P64 + svec_len = ctx.builder.CreateTrunc(svec_len, getInt32Ty(ctx.builder.getContext())); +#endif + Value *svec_data = emit_ptrgep(ctx, emit_pointer_from_objref(ctx, svec_val), ctx.types().sizeof_ptr); + OperandBundleDef OpBundle("jl_roots", svec_val); + Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, svec_data, svec_len }, OpBundle); + *ret = mark_julia_type(ctx, r, true, jl_any_type); + return true; + } } else if (f == BUILTIN(tuple)) { @@ -4100,6 +4117,27 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } } + else if (f == BUILTIN(svec)) { + if (nargs == 0) { + *ret = mark_julia_const(ctx, (jl_value_t*)jl_emptysvec); + return true; + } + Value *svec = emit_allocobj(ctx, ctx.types().sizeof_ptr * (nargs + 1), ctx.builder.CreateIntToPtr(emit_tagfrom(ctx, jl_simplevector_type), ctx.types().T_pjlvalue), true, julia_alignment((jl_value_t*)jl_simplevector_type)); + Value *svec_derived = decay_derived(ctx, svec); + ctx.builder.CreateAlignedStore(ConstantInt::get(ctx.types().T_size, nargs), svec_derived, Align(ctx.types().sizeof_ptr)); + Value *svec_data = emit_ptrgep(ctx, svec_derived, ctx.types().sizeof_ptr); + ctx.builder.CreateMemSet(svec_data, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ctx.types().sizeof_ptr * nargs, Align(ctx.types().sizeof_ptr)); + for (size_t i = 0; i < nargs; i++) { + Value *elem = boxed(ctx, argv[i + 1]); + Value *elem_ptr = emit_ptrgep(ctx, svec_derived, ctx.types().sizeof_ptr * (i + 1)); + auto *store = ctx.builder.CreateAlignedStore(elem, elem_ptr, Align(ctx.types().sizeof_ptr)); + store->setOrdering(AtomicOrdering::Release); + emit_write_barrier(ctx, svec, elem); + } + *ret = mark_julia_type(ctx, svec, true, jl_simplevector_type); + return true; + } + else if (f == BUILTIN(throw) && nargs == 1) { Value *arg1 = boxed(ctx, argv[1]); raise_exception(ctx, arg1); @@ -4599,6 +4637,20 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, return emit_f_opfield(ctx, ret, f, argv, nargs, nullptr); } + else if (f == BUILTIN(_svec_len) && nargs == 1) { + const jl_cgval_t &obj = argv[1]; + Value *len; + if (obj.constant && jl_is_svec(obj.constant)) { + len = ConstantInt::get(ctx.types().T_size, jl_svec_len(obj.constant)); + } + else { + Value *svec_val = decay_derived(ctx, boxed(ctx, obj)); + len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, svec_val, Align(ctx.types().sizeof_ptr)); + } + *ret = mark_julia_type(ctx, len, false, jl_long_type); + return true; + } + else if (f == BUILTIN(nfields) && nargs == 1) { const jl_cgval_t &obj = argv[1]; if (ctx.vaSlot > 0) {