diff --git a/Compiler/src/ssair/passes.jl b/Compiler/src/ssair/passes.jl index 46ed299167060..1db3b84afd882 100644 --- a/Compiler/src/ssair/passes.jl +++ b/Compiler/src/ssair/passes.jl @@ -872,6 +872,49 @@ function perform_lifting!(compact::IncrementalCompact, return Pair{Any, PhiNest}(stmt_val, PhiNest(visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping, walker_callback)) end +function lift_apply_args!(compact::IncrementalCompact, idx::Int, stmt::Expr, ๐•ƒโ‚’::AbstractLattice) + # Handle _apply_iterate calls: convert arguments to use `Core.svec`. The behavior of Core.svec (with boxing) better matches the ABI of codegen. + compact[idx] = nothing + for i in 4:length(stmt.args) # Skip iterate function, f, and first iterator + arg = stmt.args[i] + arg_type = argextype(arg, compact) + svec_args = nothing + if isa(arg_type, DataType) && arg_type.name === Tuple.name + if isa(arg, SSAValue) + arg_stmt = compact[arg][:stmt] + if is_known_call(arg_stmt, Core.tuple, compact) + svec_args = copy(arg_stmt.args) + end + end + if svec_args === nothing + # Fallback path: generate getfield calls for tuple elements + tuple_length = length(arg_type.parameters) + if tuple_length > 0 && !isvarargtype(arg_type.parameters[tuple_length]) + svec_args = Vector{Any}(undef, tuple_length + 1) + for j in 1:tuple_length + getfield_call = Expr(:call, GlobalRef(Core, :getfield), arg, j) + getfield_type = arg_type.parameters[j] + inst = compact[SSAValue(idx)] + getfield_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(getfield_call, getfield_type, NoCallInfo(), inst[:line], inst[:flag])) + svec_args[j + 1] = getfield_ssa + end + end + end + end + # Create Core.svec call if we have arguments + if svec_args !== nothing + svec_args[1] = GlobalRef(Core, :svec) + new_svec_call = Expr(:call) + new_svec_call.args = svec_args + inst = compact[SSAValue(idx)] + new_svec_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(new_svec_call, SimpleVector, NoCallInfo(), inst[:line], inst[:flag])) + stmt.args[i] = new_svec_ssa + end + end + compact[idx] = stmt + nothing +end + function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr) length(stmt.args) != 3 && return @@ -1375,6 +1418,9 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) compact[SSAValue(idx)] = (compact[enter_ssa][:stmt]::EnterNode).scope elseif isexpr(stmt, :new) refine_new_effects!(๐•ƒโ‚’, compact, idx, stmt) + elseif is_known_call(stmt, Core._apply_iterate, compact) + length(stmt.args) >= 4 || continue + lift_apply_args!(compact, idx, stmt, ๐•ƒโ‚’) end continue end diff --git a/Compiler/src/tfuncs.jl b/Compiler/src/tfuncs.jl index b49885aa46721..33c866c401811 100644 --- a/Compiler/src/tfuncs.jl +++ b/Compiler/src/tfuncs.jl @@ -585,6 +585,15 @@ end add_tfunc(nfields, 1, 1, nfields_tfunc, 1) add_tfunc(Core._expr, 1, INT_INF, @nospecs((๐•ƒ::AbstractLattice, args...)->Expr), 100) add_tfunc(svec, 0, INT_INF, @nospecs((๐•ƒ::AbstractLattice, args...)->SimpleVector), 20) + +@nospecs function _svec_len_tfunc(๐•ƒ::AbstractLattice, s) + if isa(s, Const) && isa(s.val, SimpleVector) + return Const(length(s.val)) + end + return Int +end +add_tfunc(Core._svec_len, 1, 1, _svec_len_tfunc, 1) + @nospecs function _svec_ref_tfunc(๐•ƒ::AbstractLattice, s, i) if isa(s, Const) && isa(i, Const) s, i = s.val, i.val @@ -1986,15 +1995,8 @@ function tuple_tfunc(๐•ƒ::AbstractLattice, argtypes::Vector{Any}) # UnionAll context is missing around this. pop!(argtypes) end - all_are_const = true - for i in 1:length(argtypes) - if !isa(argtypes[i], Const) - all_are_const = false - break - end - end - if all_are_const - return Const(ntuple(i::Int->argtypes[i].val, length(argtypes))) + if is_all_const_arg(argtypes, 1) # repeated from builtin_tfunction for the benefit of callers that use this tfunc directly + return Const(tuple(collect_const_args(argtypes, 1)...)) end params = Vector{Any}(undef, length(argtypes)) anyinfo = false @@ -2359,6 +2361,9 @@ function _builtin_nothrow(๐•ƒ::AbstractLattice, @nospecialize(f::Builtin), argt elseif f === Core.compilerbarrier na == 2 || return false return compilerbarrier_nothrow(argtypes[1], nothing) + elseif f === Core._svec_len + na == 1 || return false + return _svec_len_tfunc(๐•ƒ, argtypes[1]) isa Const elseif f === Core._svec_ref na == 2 || return false return _svec_ref_tfunc(๐•ƒ, argtypes[1], argtypes[2]) isa Const @@ -2366,7 +2371,7 @@ function _builtin_nothrow(๐•ƒ::AbstractLattice, @nospecialize(f::Builtin), argt return false end -# known to be always effect-free (in particular nothrow) +# known to be always effect-free (in particular also nothrow) const _PURE_BUILTINS = Any[ tuple, svec, @@ -2395,6 +2400,8 @@ const _CONSISTENT_BUILTINS = Any[ donotdelete, memoryrefnew, memoryrefoffset, + Core._svec_len, + Core._svec_ref, ] # known to be effect-free (but not necessarily nothrow) @@ -2419,6 +2426,7 @@ const _EFFECT_FREE_BUILTINS = [ Core.throw_methoderror, getglobal, compilerbarrier, + Core._svec_len, Core._svec_ref, ] @@ -2453,6 +2461,7 @@ const _ARGMEM_BUILTINS = Any[ replacefield!, setfield!, swapfield!, + Core._svec_len, Core._svec_ref, ] @@ -2637,7 +2646,7 @@ function builtin_effects(๐•ƒ::AbstractLattice, @nospecialize(f::Builtin), argty else if contains_is(_CONSISTENT_BUILTINS, f) consistent = ALWAYS_TRUE - elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_ref + elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_len || f === Core._svec_ref consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY elseif f === Core._typevar || f === Core.memorynew consistent = CONSISTENT_IF_NOTRETURNED @@ -2746,11 +2755,12 @@ end function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any}, sv::Union{AbsIntState, Nothing}) ๐•ƒแตข = typeinf_lattice(interp) - if isa(f, IntrinsicFunction) - if is_pure_intrinsic_infer(f) && all(@nospecialize(a) -> isa(a, Const), argtypes) - argvals = anymap(@nospecialize(a) -> (a::Const).val, argtypes) + # Early constant evaluation for foldable builtins with all const args + if isa(f, IntrinsicFunction) ? is_pure_intrinsic_infer(f) : (f in _PURE_BUILTINS || (f in _CONSISTENT_BUILTINS && f in _EFFECT_FREE_BUILTINS)) + if is_all_const_arg(argtypes, 1) + argvals = collect_const_args(argtypes, 1) try - # unroll a few cases which have specialized codegen + # unroll a few common cases for better codegen if length(argvals) == 1 return Const(f(argvals[1])) elseif length(argvals) == 2 @@ -2764,6 +2774,8 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp return Bottom end end + end + if isa(f, IntrinsicFunction) iidx = Int(reinterpret(Int32, f)) + 1 if iidx < 0 || iidx > length(T_IFUNC) # unknown intrinsic @@ -2790,6 +2802,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp end tf = T_FFUNC_VAL[fidx] end + if hasvarargtype(argtypes) if length(argtypes) - 1 > tf[2] # definitely too many arguments diff --git a/Compiler/test/codegen.jl b/Compiler/test/codegen.jl index be2df190764a0..32c279da9bda6 100644 --- a/Compiler/test/codegen.jl +++ b/Compiler/test/codegen.jl @@ -133,14 +133,14 @@ if !is_debug_build && opt_level > 0 # Array test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Vector{Int}})), [Iptr]) # As long as the eltype is known we don't need to load the elsize, but do need to check isvector - @test_skip test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Array{Any}})), ["atomic $Iptr", "ptr", "ptr", Iptr, Iptr, "ptr", Iptr]) + @test_skip test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Array{Any}})), ["atomic volatile $Iptr", "ptr", "ptr", Iptr, Iptr, "ptr", Iptr]) # Memory test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory{Int}})), [Iptr]) # As long as the eltype is known we don't need to load the elsize test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory{Any}})), [Iptr]) # Check that we load the elsize and isunion from the typeof layout - test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic $Iptr", "ptr", "i32", "i16"]) - test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic $Iptr", "ptr", "i32", "i16"]) + test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic volatile $Iptr", "ptr", "i32", "i16"]) + test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic volatile $Iptr", "ptr", "i32", "i16"]) # Primitive Type size should be folded to a constant test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Ptr})), String[]) diff --git a/Compiler/test/effects.jl b/Compiler/test/effects.jl index 720825aa145f8..79d16602a8d92 100644 --- a/Compiler/test/effects.jl +++ b/Compiler/test/effects.jl @@ -1466,7 +1466,7 @@ end let effects = Base.infer_effects((Core.SimpleVector,Int); optimize=false) do svec, i Core._svec_ref(svec, i) end - @test !Compiler.is_consistent(effects) + @test Compiler.is_consistent(effects) @test Compiler.is_effect_free(effects) @test !Compiler.is_nothrow(effects) @test Compiler.is_terminates(effects) diff --git a/base/Base.jl b/base/Base.jl index f8905bf49ce79..5940f32d4bd71 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -174,6 +174,7 @@ using .Filesystem include("cmd.jl") include("process.jl") include("terminfo.jl") +include("Terminals.jl") # Moved from REPL to reduce invalidations include("secretbuffer.jl") # core math functions diff --git a/stdlib/REPL/src/Terminals.jl b/base/Terminals.jl similarity index 100% rename from stdlib/REPL/src/Terminals.jl rename to base/Terminals.jl diff --git a/base/essentials.jl b/base/essentials.jl index b817e53ec6c4d..412629411e36a 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -934,11 +934,7 @@ setindex!(A::MemoryRef{Any}, @nospecialize(x)) = (memoryrefset!(A, x, :not_atomi getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref(v, i)) function length(v::SimpleVector) - @_total_meta - t = @_gc_preserve_begin v - len = unsafe_load(Ptr{Int}(pointer_from_objref(v))) - @_gc_preserve_end t - return len + Core._svec_len(v) end firstindex(v::SimpleVector) = 1 lastindex(v::SimpleVector) = length(v) diff --git a/base/sort.jl b/base/sort.jl index 8254f56b3f952..db865150edf57 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -563,12 +563,15 @@ function _sort!(v::UnwrappableSubArray, a::SubArrayOptimization, o::Ordering, kw @getkw lo hi # @assert v.stride1 == 1 parent = v.parent - if parent isa Array && !(parent isa Vector) && hi - lo < 100 + if parent isa Array && !(parent isa Vector) && hi - lo < 100 || !iszero(v.offset1) # vec(::Array{T, โ‰ 1}) allocates and is therefore somewhat expensive. # We don't want that for small inputs. + + # Additionally, if offset1 is non-zero, then this optimization is incompatible with + # algorithms that track absolute first and last indices (e.g. ScratchQuickSort) _sort!(v, a.next, o, kw) else - _sort!(vec(parent), a.next, o, (;kw..., lo = lo + v.offset1, hi = hi + v.offset1)) + _sort!(vec(parent), a.next, o, kw) end end diff --git a/base/stream.jl b/base/stream.jl index 5732a62c2153b..6103d4ff1bb31 100644 --- a/base/stream.jl +++ b/base/stream.jl @@ -378,7 +378,7 @@ end function isopen(x::Union{LibuvStream, LibuvServer}) if x.status == StatusUninit || x.status == StatusInit || x.handle === C_NULL - throw(ArgumentError("$x is not initialized")) + throw(ArgumentError("stream not initialized")) end return x.status != StatusClosed end diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index 45b4a9a485a4d..88972be56ab4d 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -34,6 +34,14 @@ hardcoded_precompile_statements = """ precompile(Base.unsafe_string, (Ptr{UInt8},)) precompile(Base.unsafe_string, (Ptr{Int8},)) +# used by REPL +precompile(Tuple{typeof(Base.getproperty), Base.Terminals.TTYTerminal, Symbol}) +precompile(Tuple{typeof(Base.reseteof), Base.Terminals.TTYTerminal}) +precompile(Tuple{typeof(Base.Terminals.enable_bracketed_paste), Base.Terminals.TTYTerminal}) +precompile(Tuple{typeof(Base.Terminals.width), Base.Terminals.TTYTerminal}) +precompile(Tuple{typeof(Base.Terminals.height), Base.Terminals.TTYTerminal}) +precompile(Tuple{typeof(Base.write), Base.Terminals.TTYTerminal, Array{UInt8, 1}}) + # loading.jl - without these each precompile worker would precompile these because they're hit before pkgimages are loaded precompile(Base.__require, (Module, Symbol)) precompile(Base.__require, (Base.PkgId,)) diff --git a/deps/libssh2.mk b/deps/libssh2.mk index 3f802db15be6d..661e26516c828 100644 --- a/deps/libssh2.mk +++ b/deps/libssh2.mk @@ -22,6 +22,7 @@ LIBSSH2_OPTS += -G"MSYS Makefiles" endif else LIBSSH2_OPTS += -DCRYPTO_BACKEND=OpenSSL -DENABLE_ZLIB_COMPRESSION=OFF +LIBSSH2_OPTS += -DOPENSSL_ROOT_DIR=$(build_prefix) endif ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD)) diff --git a/deps/openssl.mk b/deps/openssl.mk index 6f96717b2fb74..705303432c2c6 100644 --- a/deps/openssl.mk +++ b/deps/openssl.mk @@ -72,14 +72,21 @@ ifeq ($(OS),$(BUILD_OS)) endif echo 1 > $@ +# Override bindir and only install runtime libraries, otherwise they'll go into build_depsbindir. +OPENSSL_INSTALL = \ + mkdir -p $2$$(build_shlibdir) && \ + $$(MAKE) -C $1 install_dev $$(MAKE_COMMON) bindir=$$(build_shlibdir) $3 DESTDIR="$2" + +OPENSSL_POST_INSTALL := \ + $(WIN_MAKE_HARD_LINK) $(build_bindir)/libcrypto-*.dll $(build_bindir)/libcrypto.dll && \ + $(WIN_MAKE_HARD_LINK) $(build_bindir)/libssl-*.dll $(build_bindir)/libssl.dll && \ + $(INSTALL_NAME_CMD)libcrypto.$(SHLIB_EXT) $(build_shlibdir)/libcrypto.$(SHLIB_EXT) && \ + $(INSTALL_NAME_CMD)libssl.$(SHLIB_EXT) $(build_shlibdir)/libssl.$(SHLIB_EXT) && \ + $(INSTALL_NAME_CHANGE_CMD) $(build_shlibdir)/libcrypto.3.dylib @rpath/libcrypto.$(SHLIB_EXT) $(build_shlibdir)/libssl.$(SHLIB_EXT) + $(eval $(call staged-install, \ openssl,openssl-$(OPENSSL_VER), \ - MAKE_INSTALL,,, \ - $$(WIN_MAKE_HARD_LINK) $(build_bindir)/libcrypto-*.dll $(build_bindir)/libcrypto.dll && \ - $$(WIN_MAKE_HARD_LINK) $(build_bindir)/libssl-*.dll $(build_bindir)/libssl.dll && \ - $$(INSTALL_NAME_CMD)libcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libcrypto.$$(SHLIB_EXT) && \ - $$(INSTALL_NAME_CMD)libssl.$$(SHLIB_EXT) $$(build_shlibdir)/libssl.$$(SHLIB_EXT) && \ - $$(INSTALL_NAME_CHANGE_CMD) $$(build_shlibdir)/libcrypto.3.dylib @rpath/libcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libssl.$$(SHLIB_EXT))) + OPENSSL_INSTALL,,,$(OPENSSL_POST_INSTALL))) clean-openssl: -rm -f $(BUILDDIR)/-openssl-$(OPENSSL_VER)/build-configured $(BUILDDIR)/-openssl-$(OPENSSL_VER)/build-compiled diff --git a/doc/src/base/base.md b/doc/src/base/base.md index e6c8ff554d494..d274ec8f01721 100644 --- a/doc/src/base/base.md +++ b/doc/src/base/base.md @@ -112,14 +112,14 @@ where [] ``` -## Standard Modules +## [Standard Modules](@id standard-modules) ```@docs Main Core Base ``` -## Base Submodules +## [Base Submodules](@id base-submodules) ```@docs Base.Broadcast Base.Docs diff --git a/doc/src/index.md b/doc/src/index.md index 8342ff448625d..7d781f25b4235 100644 --- a/doc/src/index.md +++ b/doc/src/index.md @@ -76,7 +76,7 @@ and [Ruby](https://en.wikipedia.org/wiki/Ruby_(programming_language)). The most significant departures of Julia from typical dynamic languages are: - * The core language imposes very little; Julia Base and the standard library are written in Julia itself, including + * The core language imposes very little; [Julia Base and the standard library](@ref man-core-base-and-stdlib) are written in Julia itself, including primitive operations like integer arithmetic * A rich language of types for constructing and describing objects, that can also optionally be used to make type declarations @@ -126,3 +126,40 @@ language. In addition to the above, some advantages of Julia over comparable sys * Call C functions directly (no wrappers or special APIs needed) * Powerful shell-like capabilities for managing other processes * Lisp-like macros and other metaprogramming facilities + +## [Julia Standard Modules and the Standard Library](@id man-standard-modules-stdlib) + +The Julia runtime comes with [standard modules](@ref standard-modules), +which are essential namespaces that are usually loaded automatically. + +```@docs; canonical=false +Core +Base +``` + +Julia's `Base` module contains various [useful submodules](@ref base-submodules). + +### [The Standard Library](@id man-stdlib) + +The Julia standard library contains additional, commonly used packages that are installed alongside the Julia runtime by default. +To use a standard library package, it is first necessary to load the package with a [`using`](@ref) or [`import`](@ref) statement. +Links to available standard library packages are provided below, +and may also be found in the website sidebar. +Their source code is available in the `Sys.STDLIB` directory of a Julia installation. + +```@eval +import Markdown +list = sort(filter(x -> match(r"_jll$", x) === nothing, readdir(Sys.STDLIB))) +Markdown.parse(join("- [`" .* list .* "`](stdlib/" .* list .* ".html)", "\n")) +``` + +Julia also provides various standard, pre-built binary libraries +of established software that is written in other languages. +By convention, these packages have names that end with `_jll`. +The [`using`](@ref) statement can load symbol names from these binary libraries: + +```@eval +import Markdown +list = sort(filter(x -> match(r"_jll$", x) !== nothing, readdir(Sys.STDLIB))) +Markdown.parse(join("- [`" .* list .* "`](stdlib/" .* list .* ".html)", "\n")) +``` diff --git a/src/builtin_proto.h b/src/builtin_proto.h index 586d948f722c1..53414d190906c 100644 --- a/src/builtin_proto.h +++ b/src/builtin_proto.h @@ -19,6 +19,7 @@ extern "C" { XX(_primitivetype,"_primitivetype") \ XX(_setsuper,"_setsuper!") \ XX(_structtype,"_structtype") \ + XX(_svec_len,"_svec_len") \ XX(_svec_ref,"_svec_ref") \ XX(_typebody,"_typebody!") \ XX(_typevar,"_typevar") \ diff --git a/src/builtins.c b/src/builtins.c index 0ff1a1cf61491..c57a677198754 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -693,9 +693,15 @@ JL_CALLABLE(jl_f__apply_iterate) return (jl_value_t*)t; } } - else if (f == BUILTIN(tuple) && jl_is_tuple(args[1])) { - return args[1]; + else if (f == BUILTIN(tuple)) { + if (jl_is_tuple(args[1])) + return args[1]; + if (jl_is_svec(args[1])) + return jl_f_tuple(NULL, jl_svec_data(args[1]), jl_svec_len(args[1])); } + // optimization for `f(svec...)` + if (jl_is_svec(args[1])) + return jl_apply_generic(f, jl_svec_data(args[1]), jl_svec_len(args[1])); } // estimate how many real arguments we appear to have size_t precount = 1; @@ -2091,6 +2097,14 @@ JL_CALLABLE(jl_f__compute_sparams) return (jl_value_t*)env; } +JL_CALLABLE(jl_f__svec_len) +{ + JL_NARGS(_svec_len, 1, 1); + jl_svec_t *s = (jl_svec_t*)args[0]; + JL_TYPECHK(_svec_len, simplevector, (jl_value_t*)s); + return jl_box_long(jl_svec_len(s)); +} + JL_CALLABLE(jl_f__svec_ref) { JL_NARGS(_svec_ref, 2, 2); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 65fbd80c303b5..138fa54949cf8 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2228,6 +2228,9 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j } Value *instr = nullptr; if (!isboxed && jl_is_genericmemoryref_type(jltype)) { + //We don't specify the stronger expected memory ordering here because of fears it may interfere with vectorization and other optimizations + //if (Order == AtomicOrdering::NotAtomic) + // Order = AtomicOrdering::Monotonic; // load these FCA as individual fields, so LLVM does not need to split them later Value *fld0 = ctx.builder.CreateStructGEP(elty, ptr, 0); LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false); @@ -2401,11 +2404,26 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, instr = load; } if (r) { - StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment)); - store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.noalias = MDNode::concatenate(aliasscope, ai.noalias); - ai.decorateInst(store); + if (false && !isboxed && Order == AtomicOrdering::NotAtomic && jl_is_genericmemoryref_type(jltype)) { + // if enabled, store these FCA as individual fields, so LLVM does not need to split them later and they can use release ordering + assert(r->getType() == ctx.types().T_jlgenericmemory); + Value *f1 = ctx.builder.CreateExtractValue(r, 0); + Value *f2 = ctx.builder.CreateExtractValue(r, 1); + static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order"); + StoreInst *store = ctx.builder.CreateAlignedStore(f1, ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, ptr, 0), Align(alignment)); + store->setOrdering(AtomicOrdering::Release); + ai.decorateInst(store); + store = ctx.builder.CreateAlignedStore(f2, ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, ptr, 1), Align(alignment)); + store->setOrdering(AtomicOrdering::Release); + ai.decorateInst(store); + } + else { + StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment)); + store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order); + ai.decorateInst(store); + } } else { assert(Order == AtomicOrdering::NotAtomic && !isboxed && rhs.typ == jltype); @@ -4377,10 +4395,11 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg for (size_t i = nargs; i < nf; i++) { if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, strctinfo.tbaa); - ai.decorateInst(ctx.builder.CreateAlignedStore( + auto *store = ctx.builder.CreateAlignedStore( ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), emit_ptrgep(ctx, strct, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1), - Align(1))); + Align(1)); + ai.decorateInst(store); } } // TODO: verify that nargs <= nf (currently handled by front-end) diff --git a/src/codegen.cpp b/src/codegen.cpp index 6c4543fcf0049..3189171e9b30a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -4072,21 +4072,38 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } } - else if ((f == BUILTIN(_apply_iterate) && nargs == 3) && ctx.vaSlot > 0) { + else if (f == BUILTIN(_apply_iterate) && nargs == 3) { // turn Core._apply_iterate(iter, f, Tuple) ==> f(Tuple...) using the jlcall calling convention if Tuple is the va allocation - if (LoadInst *load = dyn_cast_or_null(argv[3].V)) { - if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) { - Value *theF = boxed(ctx, argv[2]); - Value *nva = emit_n_varargs(ctx); + if (ctx.vaSlot > 0) { + if (LoadInst *load = dyn_cast_or_null(argv[3].V)) { + if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) { + Value *theF = boxed(ctx, argv[2]); + Value *nva = emit_n_varargs(ctx); #ifdef _P64 - nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext())); + nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext())); #endif - Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)); - Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva }); - *ret = mark_julia_type(ctx, r, true, jl_any_type); - return true; + Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)); + Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva }); + *ret = mark_julia_type(ctx, r, true, jl_any_type); + return true; + } } } + // optimization for _apply_iterate when there is one argument and it is a SimpleVector + const jl_cgval_t &arg = argv[3]; + if (arg.typ == (jl_value_t*)jl_simplevector_type) { + Value *theF = boxed(ctx, argv[2]); + Value *svec_val = boxed(ctx, arg); + Value *svec_len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, decay_derived(ctx, svec_val), Align(ctx.types().sizeof_ptr)); +#ifdef _P64 + svec_len = ctx.builder.CreateTrunc(svec_len, getInt32Ty(ctx.builder.getContext())); +#endif + Value *svec_data = emit_ptrgep(ctx, emit_pointer_from_objref(ctx, svec_val), ctx.types().sizeof_ptr); + OperandBundleDef OpBundle("jl_roots", svec_val); + Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, svec_data, svec_len }, OpBundle); + *ret = mark_julia_type(ctx, r, true, jl_any_type); + return true; + } } else if (f == BUILTIN(tuple)) { @@ -4100,6 +4117,27 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } } + else if (f == BUILTIN(svec)) { + if (nargs == 0) { + *ret = mark_julia_const(ctx, (jl_value_t*)jl_emptysvec); + return true; + } + Value *svec = emit_allocobj(ctx, ctx.types().sizeof_ptr * (nargs + 1), ctx.builder.CreateIntToPtr(emit_tagfrom(ctx, jl_simplevector_type), ctx.types().T_pjlvalue), true, julia_alignment((jl_value_t*)jl_simplevector_type)); + Value *svec_derived = decay_derived(ctx, svec); + ctx.builder.CreateAlignedStore(ConstantInt::get(ctx.types().T_size, nargs), svec_derived, Align(ctx.types().sizeof_ptr)); + Value *svec_data = emit_ptrgep(ctx, svec_derived, ctx.types().sizeof_ptr); + ctx.builder.CreateMemSet(svec_data, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ctx.types().sizeof_ptr * nargs, Align(ctx.types().sizeof_ptr)); + for (size_t i = 0; i < nargs; i++) { + Value *elem = boxed(ctx, argv[i + 1]); + Value *elem_ptr = emit_ptrgep(ctx, svec_derived, ctx.types().sizeof_ptr * (i + 1)); + auto *store = ctx.builder.CreateAlignedStore(elem, elem_ptr, Align(ctx.types().sizeof_ptr)); + store->setOrdering(AtomicOrdering::Release); + emit_write_barrier(ctx, svec, elem); + } + *ret = mark_julia_type(ctx, svec, true, jl_simplevector_type); + return true; + } + else if (f == BUILTIN(throw) && nargs == 1) { Value *arg1 = boxed(ctx, argv[1]); raise_exception(ctx, arg1); @@ -4599,6 +4637,20 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, return emit_f_opfield(ctx, ret, f, argv, nargs, nullptr); } + else if (f == BUILTIN(_svec_len) && nargs == 1) { + const jl_cgval_t &obj = argv[1]; + Value *len; + if (obj.constant && jl_is_svec(obj.constant)) { + len = ConstantInt::get(ctx.types().T_size, jl_svec_len(obj.constant)); + } + else { + Value *svec_val = decay_derived(ctx, boxed(ctx, obj)); + len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, svec_val, Align(ctx.types().sizeof_ptr)); + } + *ret = mark_julia_type(ctx, len, false, jl_long_type); + return true; + } + else if (f == BUILTIN(nfields) && nargs == 1) { const jl_cgval_t &obj = argv[1]; if (ctx.vaSlot > 0) { diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index dcbe0295c40ae..61420c7306de9 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -334,6 +334,8 @@ XX(jl_new_method_table) \ XX(jl_new_method_uninit) \ XX(jl_new_module) \ + XX(jl_new_opaque_closure_from_code_info) \ + XX(jl_new_opaque_closure_from_code_info_in_world) \ XX(jl_new_primitivetype) \ XX(jl_new_struct) \ XX(jl_new_structt) \ diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 25ec6959eddf1..1d262ff7968b0 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1899,6 +1899,9 @@ Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value * auto &M = *builder.GetInsertBlock()->getModule(); LoadInst *load = builder.CreateAlignedLoad(T_size, addr, M.getDataLayout().getPointerABIAlignment(0), V->getName() + ".tag"); load->setOrdering(AtomicOrdering::Unordered); + // Mark as volatile to prevent optimizers from treating GC tag loads as constants + // since GC mark bits can change during runtime (issue #59547) + load->setVolatile(true); load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); MDBuilder MDB(load->getContext()); auto *NullInt = ConstantInt::get(T_size, 0); diff --git a/src/opaque_closure.c b/src/opaque_closure.c index 2e39d5965b45a..8561449216d00 100644 --- a/src/opaque_closure.c +++ b/src/opaque_closure.c @@ -28,7 +28,7 @@ JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *sourc } static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub, - jl_value_t *source_, jl_value_t *captures, int do_compile) + jl_value_t *source_, jl_value_t *captures, int do_compile, size_t world) { if (!jl_is_tuple_type((jl_value_t*)argt)) { jl_error("OpaqueClosure argument tuple must be a tuple type"); @@ -61,7 +61,6 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t } } jl_task_t *ct = jl_current_task; - size_t world = ct->world_age; jl_code_instance_t *ci = NULL; if (do_compile) { ci = jl_compile_method_internal(mi, world); @@ -140,13 +139,13 @@ jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_ { jl_value_t *captures = jl_f_tuple(NULL, env, nenv); JL_GC_PUSH1(&captures); - jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, source_, captures, do_compile); + jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, source_, captures, do_compile, jl_current_task->world_age); JL_GC_POP(); return oc; } -JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub, - jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile, int isinferred) +JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info_in_world(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub, + jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile, int isinferred, size_t world) { jl_value_t *root = NULL, *sigtype = NULL; jl_code_instance_t *inst = NULL; @@ -156,7 +155,6 @@ JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tuplet root = jl_new_struct(jl_linenumbernode_type, root, file); jl_method_t *meth = jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva, isinferred); root = (jl_value_t*)meth; - size_t world = jl_current_task->world_age; // these are only legal in the current world since they are not in any tables jl_atomic_store_release(&meth->primary_world, world); @@ -172,11 +170,17 @@ JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tuplet jl_mi_cache_insert(mi, inst); } - jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env, do_compile); + jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env, do_compile, world); JL_GC_POP(); return oc; } +JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub, + jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile, int isinferred) +{ + return jl_new_opaque_closure_from_code_info_in_world(argt, rt_lb, rt_ub, mod, ci, lineno, file, nargs, isva, env, do_compile, isinferred, jl_current_task->world_age); +} + JL_CALLABLE(jl_new_opaque_closure_jlcall) { if (nargs < 5) diff --git a/src/threading.c b/src/threading.c index 655cf26c782b2..9f5c18fe53555 100644 --- a/src/threading.c +++ b/src/threading.c @@ -309,6 +309,8 @@ JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT return jl_current_task->ptls->rngseed; } +typedef void (*unw_tls_ensure_func)(void) JL_NOTSAFEPOINT; + // get thread local rng JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT { @@ -394,7 +396,10 @@ jl_ptls_t jl_init_threadtls(int16_t tid) #if !defined(_OS_WINDOWS_) && !defined(JL_DISABLE_LIBUNWIND) && !defined(LLVMLIBUNWIND) // ensures libunwind TLS space for this thread is allocated eagerly // to make unwinding async-signal-safe even when using thread local caches. - unw_ensure_tls(); + unw_tls_ensure_func jl_unw_ensure_tls = NULL; + jl_dlsym(jl_exe_handle, "unw_ensure_tls", (void**)&jl_unw_ensure_tls, 0); + if (jl_unw_ensure_tls) + jl_unw_ensure_tls(); #endif return ptls; diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index d0473d8f8d6f1..b0abb6e78f997 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -59,8 +59,7 @@ import Base: _displaysize(io::IO) = displaysize(io)::Tuple{Int,Int} -include("Terminals.jl") -using .Terminals +using Base.Terminals abstract type AbstractREPL end diff --git a/stdlib/Sockets/src/PipeServer.jl b/stdlib/Sockets/src/PipeServer.jl index 4a8965c8f0462..d0557e9c83c5f 100644 --- a/stdlib/Sockets/src/PipeServer.jl +++ b/stdlib/Sockets/src/PipeServer.jl @@ -86,7 +86,7 @@ function connect!(sock::PipeEndpoint, path::AbstractString) req = Libc.malloc(Base._sizeof_uv_connect) uv_req_set_data(req, C_NULL) ccall(:uv_pipe_connect, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), req, sock.handle, path, - @cfunction(uv_connectcb, Cvoid, (Ptr{Cvoid}, Cint))) + @cfunction(uv_connectcb_pipe, Cvoid, (Ptr{Cvoid}, Cint))) sock.status = StatusConnecting iolock_end() return sock diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl index f9e0f2f88dd78..7214841aaa41a 100644 --- a/stdlib/Sockets/src/Sockets.jl +++ b/stdlib/Sockets/src/Sockets.jl @@ -456,7 +456,7 @@ function send(sock::UDPSocket, ipaddr::IPAddr, port::Integer, msg) finally Base.sigatomic_end() iolock_begin() - q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct) + q = ct.queue; q === nothing || Base.list_deletefirst!(q::Base.IntrusiveLinkedList{Task}, ct) if uv_req_data(uvw) != C_NULL # uvw is still alive, # so make sure we won't get spurious notifications later @@ -474,9 +474,19 @@ end #from `connect` -function uv_connectcb(conn::Ptr{Cvoid}, status::Cint) +function uv_connectcb_tcp(conn::Ptr{Cvoid}, status::Cint) hand = ccall(:jl_uv_connect_handle, Ptr{Cvoid}, (Ptr{Cvoid},), conn) - sock = @handle_as hand LibuvStream + sock = @handle_as hand TCPSocket + connectcb(conn, status, hand, sock) +end + +function uv_connectcb_pipe(conn::Ptr{Cvoid}, status::Cint) + hand = ccall(:jl_uv_connect_handle, Ptr{Cvoid}, (Ptr{Cvoid},), conn) + sock = @handle_as hand PipeEndpoint + connectcb(conn, status, hand, sock) +end + +function connectcb(conn::Ptr{Cvoid}, status::Cint, hand::Ptr{Cvoid}, sock::LibuvStream) lock(sock.cond) try if status >= 0 # success @@ -508,7 +518,7 @@ function connect!(sock::TCPSocket, host::Union{IPv4, IPv6}, port::Integer) end host_in = Ref(hton(host.host)) uv_error("connect", ccall(:jl_tcp_connect, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, UInt16, Ptr{Cvoid}, Cint), - sock, host_in, hton(UInt16(port)), @cfunction(uv_connectcb, Cvoid, (Ptr{Cvoid}, Cint)), + sock, host_in, hton(UInt16(port)), @cfunction(uv_connectcb_tcp, Cvoid, (Ptr{Cvoid}, Cint)), host isa IPv6)) sock.status = StatusConnecting iolock_end() diff --git a/stdlib/Sockets/src/addrinfo.jl b/stdlib/Sockets/src/addrinfo.jl index f5599b8623a0b..86fe4335e0d37 100644 --- a/stdlib/Sockets/src/addrinfo.jl +++ b/stdlib/Sockets/src/addrinfo.jl @@ -90,7 +90,7 @@ function getalladdrinfo(host::String) finally Base.sigatomic_end() iolock_begin() - q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct) + q = ct.queue; q === nothing || Base.list_deletefirst!(q::Base.IntrusiveLinkedList{Task}, ct) if uv_req_data(req) != C_NULL # req is still alive, # so make sure we don't get spurious notifications later @@ -223,7 +223,7 @@ function getnameinfo(address::Union{IPv4, IPv6}) finally Base.sigatomic_end() iolock_begin() - q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct) + q = ct.queue; q === nothing || Base.list_deletefirst!(q::Base.IntrusiveLinkedList{Task}, ct) if uv_req_data(req) != C_NULL # req is still alive, # so make sure we don't get spurious notifications later diff --git a/test/llvmpasses/gc-writebarrier-volatile.ll b/test/llvmpasses/gc-writebarrier-volatile.ll new file mode 100644 index 0000000000000..c4bfafdb670f8 --- /dev/null +++ b/test/llvmpasses/gc-writebarrier-volatile.ll @@ -0,0 +1,41 @@ +; This file is a part of Julia. License is MIT: https://julialang.org/license + +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC,gvn)' -S %s | FileCheck %s + +; Test for issue #59547: Ensure write barrier GC tag loads are volatile +; This test verifies that the LateLowerGCFrame pass marks GC tag loads as volatile +; to prevent GVN from incorrectly constant-folding them, which would eliminate +; necessary write barrier checks. + +@tag = external addrspace(10) global {}, align 16 + +declare void @julia.write_barrier({} addrspace(10)*, {} addrspace(10)*) +declare {}*** @julia.get_pgcstack() +declare {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) + +; Test that write barrier expansion produces volatile GC tag loads +; CHECK-LABEL: @test_writebarrier_volatile_tags +define {} addrspace(10)* @test_writebarrier_volatile_tags() { +top: + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** + %parent = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) + %child = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) + call void @julia.write_barrier({} addrspace(10)* %parent, {} addrspace(10)* %child) + ret {} addrspace(10)* %parent + +; The critical test: GC tag loads must be volatile to prevent constant folding +; CHECK: load atomic volatile i64, ptr {{.*}} unordered, align 8, {{.*}}!tbaa +; CHECK: and i64 {{.*}}, 3 +; CHECK: icmp eq i64 {{.*}}, 3 +; CHECK: br i1 {{.*}}, label %may_trigger_wb, label + +; CHECK: may_trigger_wb: +; CHECK: load atomic volatile i64, ptr {{.*}} unordered, align 8, {{.*}}!tbaa +; CHECK: and i64 {{.*}}, 1 +; CHECK: icmp eq i64 {{.*}}, 0 +; CHECK: br i1 {{.*}}, label %trigger_wb, label + +; CHECK: trigger_wb: +; CHECK: call void @ijl_gc_queue_root(ptr {{.*}}) +} diff --git a/test/sorting.jl b/test/sorting.jl index e16b30de5bfc8..a0bbc51a3f239 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -1122,6 +1122,11 @@ end end end +@testset "partialsort! for UnwrappableSubArray with non-zero offset on 1.11 (#59569)" begin + a = reshape(6000:-1:1, 1000, :) |> collect; + @test partialsort!(view(copy(a), :, 6), 500:501) == [500, 501] +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, diff --git a/test/trimming/Makefile b/test/trimming/Makefile index c3145765655e7..2f29292d10bb5 100644 --- a/test/trimming/Makefile +++ b/test/trimming/Makefile @@ -33,11 +33,14 @@ JULIAC_BUILDSCRIPT := $(shell $(JULIA) -e 'print(joinpath(Sys.BINDIR, Base.DATAR #============================================================================= -release: $(BIN)/hello$(EXE) $(BIN)/basic_jll$(EXE) +release: $(BIN)/hello$(EXE) $(BIN)/trimmability$(EXE) $(BIN)/basic_jll$(EXE) $(BIN)/hello-o.a: $(SRCDIR)/hello.jl $(JULIAC_BUILDSCRIPT) $(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(JULIAC_BUILDSCRIPT) $< --output-exe true +$(BIN)/trimmability-o.a: $(SRCDIR)/trimmability.jl $(JULIAC_BUILDSCRIPT) + $(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(JULIAC_BUILDSCRIPT) $< --output-exe true + $(BIN)/basic_jll-o.a: $(SRCDIR)/basic_jll.jl $(JULIAC_BUILDSCRIPT) $(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --project=$(SRCDIR) -e "using Pkg; Pkg.instantiate()" $(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --project=$(SRCDIR) --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(JULIAC_BUILDSCRIPT) $< --output-exe true @@ -45,14 +48,17 @@ $(BIN)/basic_jll-o.a: $(SRCDIR)/basic_jll.jl $(JULIAC_BUILDSCRIPT) $(BIN)/hello$(EXE): $(BIN)/hello-o.a $(CC) -o $@ $(WHOLE_ARCHIVE) $< $(NO_WHOLE_ARCHIVE) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) +$(BIN)/trimmability$(EXE): $(BIN)/trimmability-o.a + $(CC) -o $@ $(WHOLE_ARCHIVE) $< $(NO_WHOLE_ARCHIVE) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) + $(BIN)/basic_jll$(EXE): $(BIN)/basic_jll-o.a $(CC) -o $@ $(WHOLE_ARCHIVE) $< $(NO_WHOLE_ARCHIVE) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) -check: $(BIN)/hello$(EXE) $(BIN)/basic_jll$(EXE) +check: $(BIN)/hello$(EXE) $(BIN)/trimmability$(EXE) $(BIN)/basic_jll$(EXE) $(JULIA) --depwarn=error $(SRCDIR)/trimming.jl $< clean: - -rm -f $(BIN)/hello$(EXE) $(BIN)/basic_jll$(EXE) $(BIN)/hello-o.a $(BIN)/basic_jll-o.a + -rm -f $(BIN)/hello$(EXE) $(BIN)/trimmability$(EXE) $(BIN)/basic_jll$(EXE) $(BIN)/hello-o.a $(BIN)/trimmability-o.a $(BIN)/basic_jll-o.a .PHONY: release clean check diff --git a/test/trimming/hello.jl b/test/trimming/hello.jl index bb2ca585f3662..620a55b171544 100644 --- a/test/trimming/hello.jl +++ b/test/trimming/hello.jl @@ -1,11 +1,6 @@ -world::String = "world!" -const str = OncePerProcess{String}() do - return "Hello, " * world -end +# Test that minimal executable size stays low function @main(args::Vector{String})::Cint - println(Core.stdout, str()) - println(Core.stdout, PROGRAM_FILE) - foreach(x->println(Core.stdout, x), args) + println(Core.stdout, "Hello, world!") return 0 end diff --git a/test/trimming/trimmability.jl b/test/trimming/trimmability.jl new file mode 100644 index 0000000000000..acba8244534ef --- /dev/null +++ b/test/trimming/trimmability.jl @@ -0,0 +1,54 @@ +# Test that various constructs support trimming + +using Sockets + +world::String = "world!" +const str = OncePerProcess{String}() do + return "Hello, " * world +end + +abstract type Shape end +struct Square <: Shape + side::Float64 +end +struct Circle <: Shape + radius::Float64 +end +area(s::Square) = s.side^2 +area(c::Circle) = pi*c.radius^2 + +sum_areas(v::Vector{Shape}) = sum(area, v) + +function @main(args::Vector{String})::Cint + println(Core.stdout, str()) + println(Core.stdout, PROGRAM_FILE) + foreach(x->println(Core.stdout, x), args) + + # broken on 1.12 + # + # # test map/mapreduce; should work but relies on inlining and other optimizations + # # test that you can dispatch to some number of concrete cases + # println(Core.stdout, sum_areas(Shape[Circle(1), Square(2)])) + + arr = rand(10) + sorted_arr = sort(arr) + tot = sum(sorted_arr) + tot = prod(sorted_arr) + a = any(x -> x > 0, sorted_arr) + b = all(x -> x >= 0, sorted_arr) + c = map(x -> x^2, sorted_arr) + d = mapreduce(x -> x^2, +, sorted_arr) + # e = reduce(xor, rand(Int, 10)) + + try + sock = connect("localhost", 4900) + if isopen(sock) + write(sock, "Hello") + flush(sock) + close(sock) + end + catch + end + + return 0 +end diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl index 5d55ed62b03a8..d46ad3bb74e34 100644 --- a/test/trimming/trimming.jl +++ b/test/trimming/trimming.jl @@ -6,8 +6,11 @@ bindir = dirname(ARGS[1]) let exe_suffix = splitext(Base.julia_exename())[2] hello_exe = joinpath(bindir, "hello" * exe_suffix) - @test readchomp(`$hello_exe arg1 arg2`) == "Hello, world!\n$hello_exe\narg1\narg2" - @test filesize(hello_exe) < 2_000_000 + @test readchomp(`$hello_exe arg1 arg2`) == "Hello, world!" + @test filesize(hello_exe) < 1_900_000 + + trimmability_exe = joinpath(bindir, "trimmability" * exe_suffix) + @test readchomp(`$trimmability_exe arg1 arg2`) == "Hello, world!\n$trimmability_exe\narg1\narg2" basic_jll_exe = joinpath(bindir, "basic_jll" * exe_suffix) lines = split(readchomp(`$basic_jll_exe`), "\n")