? gnu/gcc/gcc/config/i386/t_stack Index: share/man/man1/gcc-local.1 =================================================================== RCS file: /cvs/src/share/man/man1/gcc-local.1,v retrieving revision 1.51 diff -u -p -r1.51 gcc-local.1 --- share/man/man1/gcc-local.1 14 Feb 2017 12:50:15 -0000 1.51 +++ share/man/man1/gcc-local.1 24 Apr 2017 12:21:55 -0000 @@ -160,6 +160,14 @@ which includes additional functions to b have local array definitions or have references to local frame addresses. .It +On amd64, +.Fl msave-args +can be passed to the compiler to have functions save their register +arguments on the stack, while maintaining compatability with the +System 5 AMD64 ABI. +This enables tools and debuggers that understand this semantic to +trivially generate stack traces that include function arguments. +.It On the alpha, amd64, arm, hppa, i386, mips64, powerpc, sh and sparc64 architectures, .Nm gcc Index: gnu/gcc/gcc/dwarf2.h =================================================================== RCS file: /cvs/src/gnu/gcc/gcc/dwarf2.h,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 dwarf2.h --- gnu/gcc/gcc/dwarf2.h 15 Oct 2009 17:11:28 -0000 1.1.1.1 +++ gnu/gcc/gcc/dwarf2.h 24 Apr 2017 12:21:55 -0000 @@ -371,6 +371,8 @@ enum dwarf_attribute DW_AT_GNU_vector = 0x2107, /* VMS extensions. */ DW_AT_VMS_rtnbeg_pd_address = 0x2201, + /* Sun extension. */ + DW_AT_SUN_amd64_parmdump = 0x2224, /* UPC extension. */ DW_AT_upc_threads_scaled = 0x3210, /* PGI (STMicroelectronics) extensions. */ Index: gnu/gcc/gcc/dwarf2out.c =================================================================== RCS file: /cvs/src/gnu/gcc/gcc/dwarf2out.c,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 dwarf2out.c --- gnu/gcc/gcc/dwarf2out.c 15 Oct 2009 17:11:28 -0000 1.1.1.1 +++ gnu/gcc/gcc/dwarf2out.c 24 Apr 2017 12:21:55 -0000 @@ -11960,6 +11960,10 @@ gen_subprogram_die (tree decl, dw_die_re /* Add the calling convention attribute if requested. */ add_calling_convention_attribute (subr_die, TREE_TYPE (decl)); +#ifdef TARGET_SAVE_ARGS + if (TARGET_SAVE_ARGS) + add_AT_flag (subr_die, DW_AT_SUN_amd64_parmdump, 1); +#endif } /* Generate a DIE to represent a declared data object. */ Index: gnu/gcc/gcc/config/i386/i386.c =================================================================== RCS file: /cvs/src/gnu/gcc/gcc/config/i386/i386.c,v retrieving revision 1.5 diff -u -p -r1.5 i386.c --- gnu/gcc/gcc/config/i386/i386.c 1 Dec 2015 15:18:29 -0000 1.5 +++ gnu/gcc/gcc/config/i386/i386.c 24 Apr 2017 12:21:55 -0000 @@ -997,6 +997,10 @@ struct stack_local_entry GTY(()) saved frame pointer if frame_pointer_needed <- HARD_FRAME_POINTER + [-msave-args] + + [padding0] + [saved regs] [padding1] \ @@ -1009,6 +1013,8 @@ struct stack_local_entry GTY(()) */ struct ix86_frame { + int nmsave_args; + int padding0; int nregs; int padding1; int va_arg_size; @@ -1169,6 +1175,8 @@ static void ix86_dwarf_handle_frame_unsp static void i386_solaris_elf_named_section (const char *, unsigned int, tree) ATTRIBUTE_UNUSED; +static int ix86_nsaved_args(void); + /* Register class used for passing given 64bit part of the argument. These represent classes as documented by the PS ABI, with the exception of SSESF, SSEDF classes, that are basically SSE class, just gcc will @@ -1369,6 +1377,8 @@ static section *x86_64_elf_select_sectio struct gcc_target targetm = TARGET_INITIALIZER; +static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int); + /* The svr4 ABI for the i386 says that records and unions are returned in memory. */ @@ -1903,6 +1913,9 @@ override_options (void) target_flags |= MASK_NO_RED_ZONE; } + if (!TARGET_64BIT && TARGET_SAVE_ARGS) + error ("-msave-args makes no sense in the 32-bit mode"); + /* Validate -mpreferred-stack-boundary= value, or provide default. The default of 128 bits is for Pentium III's SSE __m128. We can't change it because of optimize_size. Otherwise, we can't mix object @@ -4798,7 +4811,7 @@ ix86_can_use_return_insn_p (void) return 0; ix86_compute_frame_layout (&frame); - return frame.to_allocate == 0 && frame.nregs == 0; + return frame.to_allocate == 0 && frame.nmsave_args == 0 && frame.nregs == 0; } /* Value should be nonzero if functions must have frame pointers. @@ -4818,6 +4831,9 @@ ix86_frame_pointer_required (void) if (SUBTARGET_FRAME_POINTER_REQUIRED) return 1; + if (TARGET_SAVE_ARGS) + return 1; + /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off the frame pointer by default. Turn it back on now if we've not got a leaf function. */ @@ -5114,6 +5130,7 @@ ix86_compute_frame_layout (struct ix86_f frame->local_size = size; frame->nregs = ix86_nsaved_regs (); + frame->nmsave_args = ix86_nsaved_args (); total_size = size; stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; @@ -5155,6 +5172,11 @@ ix86_compute_frame_layout (struct ix86_f else frame->save_regs_using_mov = false; + if (TARGET_SAVE_ARGS) + { + cfun->machine->use_fast_prologue_epilogue = true; + frame->save_regs_using_mov = true; + } /* Skip return address and saved base pointer. */ offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; @@ -5174,6 +5196,16 @@ ix86_compute_frame_layout (struct ix86_f if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; + /* Argument save area */ + if (TARGET_SAVE_ARGS) + { + offset += frame->nmsave_args * UNITS_PER_WORD; + frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD; + offset += frame->padding0; + } + else + frame->padding0 = 0; + /* Register save area */ offset += frame->nregs * UNITS_PER_WORD; @@ -5231,8 +5263,10 @@ ix86_compute_frame_layout (struct ix86_f (size + frame->padding1 + frame->padding2 + frame->outgoing_arguments_size + frame->va_arg_size); - if ((!frame->to_allocate && frame->nregs <= 1) - || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) + if (!TARGET_SAVE_ARGS + && ((!frame->to_allocate && frame->nregs <= 1) + || (TARGET_64BIT + && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))) frame->save_regs_using_mov = false; if (TARGET_RED_ZONE && current_function_sp_is_unchanging @@ -5241,7 +5275,11 @@ ix86_compute_frame_layout (struct ix86_f { frame->red_zone_size = frame->to_allocate; if (frame->save_regs_using_mov) - frame->red_zone_size += frame->nregs * UNITS_PER_WORD; + { + frame->red_zone_size + += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD; + frame->red_zone_size += frame->padding0; + } if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; } @@ -5250,6 +5288,8 @@ ix86_compute_frame_layout (struct ix86_f frame->to_allocate -= frame->red_zone_size; frame->stack_pointer_offset -= frame->red_zone_size; #if 0 + fprintf (stderr, "nmsave_args: %ld\n", (long)frame->nmsave_args); + fprintf (stderr, "padding0: %ld\n", (long)frame->padding0); fprintf (stderr, "nregs: %i\n", frame->nregs); fprintf (stderr, "size: %i\n", size); fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); @@ -5273,6 +5313,22 @@ ix86_emit_save_regs (void) unsigned int regno; rtx insn; + if (TARGET_SAVE_ARGS) + { + int i; + int nsaved = ix86_nsaved_args (); + int start = cfun->returns_struct; + for (i = start; i < start + nsaved; i++) + { + regno = x86_64_int_parameter_registers[i]; + insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); + RTX_FRAME_RELATED_P (insn) = 1; + } + if (nsaved % 2 != 0) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-UNITS_PER_WORD), -1); + } + for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; ) if (ix86_save_reg (regno, true)) { @@ -5298,6 +5354,25 @@ ix86_emit_save_regs_using_mov (rtx point RTX_FRAME_RELATED_P (insn) = 1; offset += UNITS_PER_WORD; } + + if (TARGET_SAVE_ARGS) + { + int i; + int nsaved = ix86_nsaved_args (); + int start = cfun->returns_struct; + if (nsaved % 2 != 0) + offset += UNITS_PER_WORD; + for (i = start + nsaved - 1; i >= start; i--) + { + regno = x86_64_int_parameter_registers[i]; + insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), + Pmode, offset), + gen_rtx_REG (Pmode, regno)); + RTX_FRAME_RELATED_P (insn) = 1; + offset += UNITS_PER_WORD; + } + } + } /* Expand prologue or epilogue stack adjustment. @@ -5470,14 +5545,16 @@ ix86_expand_prologue (void) if (!frame.save_regs_using_mov) ix86_emit_save_regs (); else - allocate += frame.nregs * UNITS_PER_WORD; + allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD + + frame.padding0; /* When using red zone we may start register saving before allocating the stack frame saving one cycle of the prologue. */ if (TARGET_RED_ZONE && frame.save_regs_using_mov) ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx, - -frame.nregs * UNITS_PER_WORD); + -(frame.nregs + frame.nmsave_args) + * UNITS_PER_WORD - frame.padding0); if (allocate == 0) ; @@ -5514,7 +5591,8 @@ ix86_expand_prologue (void) t = plus_constant (hard_frame_pointer_rtx, allocate - frame.to_allocate - - frame.nregs * UNITS_PER_WORD); + - (frame.nregs + frame.nmsave_args) + * UNITS_PER_WORD - frame.padding0); else t = plus_constant (stack_pointer_rtx, allocate); emit_move_insn (eax, gen_rtx_MEM (SImode, t)); @@ -5523,11 +5601,13 @@ ix86_expand_prologue (void) if (frame.save_regs_using_mov && !TARGET_RED_ZONE) { - if (!frame_pointer_needed || !frame.to_allocate) + if (!TARGET_SAVE_ARGS && + (!frame_pointer_needed || !frame.to_allocate)) ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); else ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, - -frame.nregs * UNITS_PER_WORD); + -(frame.nregs + frame.nmsave_args) + * UNITS_PER_WORD - frame.padding0); } pic_reg_used = false; @@ -5611,10 +5691,11 @@ ix86_expand_epilogue (int style) must be taken for the normal return case of a function using eh_return: the eax and edx registers are marked as saved, but not restored along this path. */ - offset = frame.nregs; + offset = frame.nregs + frame.nmsave_args; if (current_function_calls_eh_return && style != 2) offset -= 2; offset *= -UNITS_PER_WORD; + offset -= frame.padding0; /* If we're only restoring one register and sp is not valid then using a move instruction to restore the register since it's @@ -5670,14 +5751,16 @@ ix86_expand_epilogue (int style) { tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); tmp = plus_constant (tmp, (frame.to_allocate - + frame.nregs * UNITS_PER_WORD)); + + (frame.nregs + frame.nmsave_args) + * UNITS_PER_WORD + frame.padding0)); emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); } } else if (!frame_pointer_needed) pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (frame.to_allocate - + frame.nregs * UNITS_PER_WORD), + + (frame.nregs + frame.nmsave_args) + * UNITS_PER_WORD + frame.padding0), style); /* If not an i386, mov & pop is faster than "leave". */ else if (TARGET_USE_LEAVE || optimize_size @@ -5717,6 +5800,10 @@ ix86_expand_epilogue (int style) else emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); } + if (frame.nmsave_args) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (frame.nmsave_args * UNITS_PER_WORD + + frame.padding0), style); if (frame_pointer_needed) { /* Leave results in shorter dependency chains on CPUs that are @@ -6176,6 +6263,18 @@ bool constant_address_p (rtx x) { return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); +} + +/* Return number of arguments to be saved on the stack with + -msave-args. */ + +static int +ix86_nsaved_args (void) +{ + if (TARGET_SAVE_ARGS) + return current_function_args_info.regno - cfun->returns_struct; + else + return 0; } /* Nonzero if the constant value X is a legitimate general operand Index: gnu/gcc/gcc/config/i386/i386.opt =================================================================== RCS file: /cvs/src/gnu/gcc/gcc/config/i386/i386.opt,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 i386.opt --- gnu/gcc/gcc/config/i386/i386.opt 15 Oct 2009 17:11:30 -0000 1.1.1.1 +++ gnu/gcc/gcc/config/i386/i386.opt 24 Apr 2017 12:21:55 -0000 @@ -221,6 +221,10 @@ mtls-direct-seg-refs Target Report Mask(TLS_DIRECT_SEG_REFS) Use direct references against %gs when accessing tls data +msave-args +Target Report Mask(SAVE_ARGS) +Save integer arguments on the stack at function entry + mtune= Target RejectNegative Joined Var(ix86_tune_string) Schedule code for given CPU Index: gnu/gcc/gcc/doc/invoke.texi =================================================================== RCS file: /cvs/src/gnu/gcc/gcc/doc/invoke.texi,v retrieving revision 1.3 diff -u -p -r1.3 invoke.texi --- gnu/gcc/gcc/doc/invoke.texi 14 Jan 2014 02:03:57 -0000 1.3 +++ gnu/gcc/gcc/doc/invoke.texi 24 Apr 2017 12:21:55 -0000 @@ -9806,6 +9806,10 @@ building of shared libraries are not sup Generate code for the large model: This model makes no assumptions about addresses and sizes of sections. Currently GCC does not implement this model. + +@item -msave-args +@opindex msave-args +Save integer arguments on the stack at function entry. @end table @node IA-64 Options