------------------------------------------------------------------------------
--                                                                          --
--                         GNAT COMPILER COMPONENTS                         --
--                                                                          --
--                             E X P _ D B U G                              --
--                                                                          --
--                                 S p e c                                  --
--                                                                          --
--                            $Revision: 1.18 $                             --
--                                                                          --
--          Copyright (C) 1996-1997 Free Software Foundation, Inc.          --
--                                                                          --
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
-- terms of the  GNU General Public License as published  by the Free Soft- --
-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
-- for  more details.  You should have  received  a copy of the GNU General --
-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
-- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
-- MA 02111-1307, USA.                                                      --
--                                                                          --
-- GNAT was originally developed  by the GNAT team at  New York University. --
-- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
--                                                                          --
------------------------------------------------------------------------------

--  Expand routines for generation of special declarations used by the
--  debugger. In accordance with the Dwarf 2.2 specification, certain
--  type names are encoded to provide information to the debugger.

with Types; use Types;
with Uintp; use Uintp;

package Exp_Dbug is

   --------------------------------------
   -- Basic Encoding of External Names --
   --------------------------------------

   --  In the absence of pragma Interface, entities declared at the global
   --  level in library level packages have names obtained by folding all
   --  letters to lower case, and replacing periods with two underscores.

   --  For library level procedures, the name is folded to all lower case
   --  letters, and the characters _ada_ are prepended (the primary reason
   --  for this is to avoid confusion with similarly named C procedures,
   --  and in particular with the C procedure Main.

   --  In the case of a set of overloaded subprograms in the same
   --  package, the names are serialized by adding the suffix:

   --    __nn  (two underscores)

   --  where nn is a serial number (1 for the first overloaded function,
   --  2 for the second, etc.)

   --  Operator names are encoded using the following encodings:

   --    Oabs       abs
   --    Oand       and
   --    Omod       mod
   --    Onot       not
   --    Oor        or
   --    Orem       rem
   --    Oxor       xor
   --    Oeq        =
   --    One        /=
   --    Olt        <
   --    Ole        <=
   --    Ogt        >
   --    Oge        >=
   --    Oadd       +
   --    Osubtract  -
   --    Oconcat    &
   --    Omultiply  *
   --    Odivide    /
   --    Oexpon     **

   --  These names are prefixed by the normal full qualification, and
   --  suffixed by the overloading identification. So for example, the
   --  second operator "=" defined in package Extra.Messages would
   --  have the name:

   --    extra__messages__Oeq__2

   --  For compatibility with older versions of GNAT on some machines,
   --  the debugger should allow the use of $ in place of two underscores
   --  for numbering overloaded entities.

   --------------------------------
   -- Handling of Numeric Values --
   --------------------------------

   --  All numeric values here are encoded as strings of decimal digits.
   --  Only integer values need to be encoded. A negative value is encoded
   --  as the corresponding positive value followed by a lower case m for
   --  minus to indicate that the value is negative (e.g. 2m for -2).

   -------------------------
   -- Type Name Encodings --
   -------------------------

   --  In the following typ is the name of the type as normally encoded by
   --  the debugger rules, i.e. a non-qualified name, all in lower case,
   --  with standard encoding of upper half and wide characters

      -----------------------
      -- Fixed-Point Types --
      -----------------------

      --   Fixed-point types are encoded using a suffix that indicates the
      --   delta and small values. The actual type itself is a normal
      --   integer type.

      --     typ___XF_nn_dd
      --     typ___XF_nn_dd_nn_dd

      --   The first form is used when small = delta. The value of delta (and
      --   small) is given by the rational nn/dd, where nn and dd are decimal
      --   integers.
      --
      --   The second form is used if the small value is different from the
      --   delta. In this case, the first nn/dd rational value is for delta,
      --   and the second value is for small.

      --------------------
      -- Discrete Types --
      --------------------

      --   Discrete types are coded with a suffix indicating the range in
      --   the case where one or both of the bounds are discriminants or
      --   variable.

      --   Note: at the current time, we also encode static bounds if they
      --   do not match the natural machine type bounds, but this may be
      --   removed in the future, since it is redundant for most debug
      --   formats ???

      --     typ___XD
      --     typ___XDL_lowerbound
      --     typ___XDU_upperbound
      --     typ___XDLU_lowerbound__upperbound

      --   If a discrete type is a natural machine type (i.e. its bounds
      --   correspond in a natural manner to its size), then it is left
      --   unencoded. The above encoding forms are used when there is a
      --   constrained range that does not correspond to the size or that
      --   has discriminant references or other non-static bounds.

      --   The first form is used if both bounds are dynamic, in which case
      --   two constant objects are present whose names are typ__L and
      --   typ__U in the same scope as typ, and the values of these constants
      --   indicate the bounds. As far as the debugger is concerned, these
      --   are simply variables that can be accessed like any other variables.
      --   Note that in the enumeration case, these values correspond to the
      --   Enum_Rep values for the lower and upper bounds.

      --   The second form is used if the upper bound is dynamic, but the
      --   lower bound is either constant or depends on a discriminant of
      --   the record with which the type is associated. The upper bound
      --   is stored in a constant object of name typ__L as previously
      --   described, but the upper bound is encoded directly into the
      --   name as either a decimal integer, or as the discriminant name.

      --   The third form is similarly used if the lower bound is dynamic,
      --   but the upper bound is static or a discriminant reference.

      --   The fourth form is used if both bounds are discriminant references
      --   or static values, with the encoding first for the lower bound,
      --   then for the upper bound, as previously described.

      ------------------
      -- Biased Types --
      ------------------

      --   Only discrete types can be biased, and the fact that they are
      --   biased is indicated by a suffix of the form:

      --     typ___XB_lowerbound__upperbound

      --   Here lowerbound and upperbound are decimal integers, with the
      --   usual (postfix "m") encoding for negative numbers. Biased
      --   types are only possible where the bounds are static, and the
      --   values are represented as unsigned offsets from the lower
      --   bound given. For example:

      --   If a discrete type is a natural machine type (i.e. its bounds
      --   correspond in a natural manner to its size), then it is left
      --   unencoded. The above encoding forms are used when there is a
      --   constrained range that does not correspond to the size or that
      --   has discriminant references or other non-static bounds.

      --   The first form is used if both bounds are dynamic, in which case
      --   two constant objects are present whose names are typ__L and
      --   typ__U in the same scope as typ, and the values of these constants
      --   indicate the bounds. As far as the debugger is concerned, these
      --   are simply variables that can be accessed like any other variables.
      --   Note that in the enumeration case, these values correspond to the
      --   Enum_Rep values for the lower and upper bounds.

      --   The second form is used if the upper bound is dynamic, but the
      --   lower bound is either constant or depends on a discriminant of
      --   the record with which the type is associated. The upper bound
      --   is stored in a constant object of name typ__L as previously
      --   described, but the upper bound is encoded directly into the
      --   name as either a decimal integer, or as the discriminant name.

      --   The third form is similarly used if the lower bound is dynamic,
      --   but the upper bound is static or a discriminant reference.

      --   The fourth form is used if both bounds are discriminant references
      --   or static values, with the encoding first for the lower bound,
      --   then for the upper bound, as previously described.

      ----------------------------------------------
      -- Record Types with Variable-Length Fields --
      ----------------------------------------------

      --  If a record has at least one field whose length is not known
      --  at compile time, then the name of the record type has a suffix
      --  to indicate this:

      --    type___XV

      --  The debugging formats do not fully support these types, and indeed
      --  some formats simply generate no useful information at all for such
      --  types. In order to provide information for the debugger, gigi creates
      --  a parallel type in the same scope with the name:

      --    type___XVE

      --  The idea here is to provide all the needed information to interpret
      --  objects of the original type in the form of a "fixed up" type which
      --  is representable using the normal debugging information.

      --  There are two cases to be dealt with. First, some fields may have
      --  variable positions because they appear after variable-length fields.
      --  To deal with this, we encode *all* the field bit positions of the
      --  special ___XVE type in a non-standard manner.

      --  The idea is to encode not the position, but rather information
      --  that allows computing the position of a field from the position
      --  of the previous field. The algorithm for stepping from one field
      --  to another is as follows:

      --    1. Take the current bit position in the record, i.e. the first
      --       unused bit after the previous field, or zero if this is the
      --       first field of the record.

      --    2. If an alignment is given (see below), then round the current
      --       bit position up, if needed, to the next multiple of that
      --       alignment.

      --    3. If a bit offset is given (see below), then add the bit offset
      --       to the current bit position.

      --  The bit offset is encoded as the position of the field. A value
      --  of zero means that step 2 can be skipped (or zero added).

      --  The alignment if present is encoded in the field name of the
      --  record, which has a suffix:

      --    fieldname___XVAnn

      --  where the nn after the XA indicates the alignment value in storage
      --  units. This encoding is present only if an alignment is present.

      --  Second, the variable-length fields themselves are represented by
      --  replacing the type by a special access type. The designated type
      --  of this access type is the original variable-length type, and the
      --  fact that this field has been transformed in this way is signalled
      --  by encoding the field name as:

      --    field___XVL

      --  where field is the original field name. If a field is both
      --  variable-length and also needs an alignment encoding, then the
      --  encodings are combined using:

      --    field___XVLnn

      --  Note: the reason that we change the type is so that the resulting
      --  type has no variable-length fields. At least some of the formats
      --  used for debugging information simply cannot tolerate variable-
      --  length fields, so the encoded information would get lost.

      --  As an example of this encoding, consider the declarations:

      --    type Q is array (1 .. V1) of Float;       -- alignment 4
      --    type R is array (1 .. V2) of Long_Float;  -- alignment 8

      --    type X is record
      --       A : Character;
      --       B : Float;
      --       C : String (1 .. V3);
      --       D : Float;
      --       E : Q;
      --       F : R
      --       G : Float;
      --    end record;

      --  The encoded type looks like:

      --    type anonymousQ is access Q;
      --    type anonymousR is access R;

      --    type X___XVE is record
      --       A        : Character;               -- position contains 0
      --       B        : Float;                   -- position contains 24
      --       C___XVL  : access String (1 .. V3); -- position contains 0
      --       D___XVA4 : Float;                   -- position contains 0
      --       E___XVL  : anonymousQ;              -- position contains 0
      --       F___XVL8 : anonymousR;              -- position contains 0
      --       G        : Float;                   -- position contains 0
      --    end record;

      --  Note: the B field could also have been encoded by using a position
      --  of zero, and an alignment of 4, but in such a case, the coding by
      --  position is preferred (since it takes up less space). We have used
      --  the (illegal) notation access xxx as field types in the example
      --  above, but in actual practice.

      --  Note: all discriminants always appear before any variable-length
      --  fields that depend on them. So they can be located independent
      --  of the variable-length field, using the standard procedure for
      --  computing positions described above.

      -----------------
      -- Array Types --
      -----------------

      --  It is assumed that the debugger can obtain the index subtypes for
      --  an array type. Given the full encoding of these types (see above
      --  description for the encoding of discrete types), this means that
      --  all necessary information for addressing arrays is available. In
      --  some debugging formats, some or all of the bounds information may
      --  be available redundantly, particularly in the fixed-point case,
      --  but this information can in any case be ignored by the debugger.

   function Get_Encoded_Type_Name (E : Entity_Id) return Boolean;
   --  Return True if type name needs to be encoded according to the above
   --  rules. In that case, the suffix for the encoded name, not including
   --  the initial three underscores is stored in Name_Buffer with the
   --  length of the name in Name_Len and an ASCII.NUL character stored
   --  following the name. If no encoding is required, then False is returned
   --  and the values in Name_Buffer and Name_Len are undefined.

   procedure Get_Encoded_Field_Name
     (E     : Entity_Id;
      Align : Nat;
      Var   : Int);
   --  This routine is called to encode a field name. E is the field entity,
   --  whose Ekind is either E_Component or E_Discriminant. Align is the
   --  alignment requirement, or zero if no alignment is required, and Var
   --  is 0 for a fixed length field, and 1 for a variable-length field.
   --  The result is the proper encoding stored in Name_Buffer with a
   --  terminating Nul character, and Name_Len set to indicate the length
   --  of the encoded name omitting the terminating Nul.

   ---------------------------
   -- Packed Array Encoding --
   ---------------------------

   --  For every packed array, two types are created, and both appear in
   --  the debugging output.

   --    The original declared array type is a perfectly normal array type,
   --    and its index bounds indicate the original bounds of the array.

   --    The corresponding packed array type, which may be a modular type, or
   --    may be an array of bytes type (see Exp_Pakd for full details). This
   --    is the type that is actually used in the generated code and for
   --    debugging information for all objects of the packed type.

   --  The name of the corresponding packed array type is:

   --    ttt___XPnnn

   --  where
   --    ttt is the name of the original declared array
   --    nnn is the component size in bits (1-31)

   --  When the debugger sees that an object is of a type that is encoded
   --  in this manner, it can use the original type to determine the bounds,
   --  and the component size to determine the packing details. The packing
   --  details are documented in Exp_Pakd.

   function Make_Packed_Array_Type_Name
     (Typ   : Entity_Id;
      Csize : Uint)
      return  Name_Id;
   --  This function is used in Exp_Pakd to create the name that is encoded
   --  as described above. The entity Typ provides the name ttt, and the
   --  value Csize is the component size that provides the nnn value.

   --------------------------------------
   -- Pointers to Unconstrained Arrays --
   --------------------------------------

   --  There are two kinds of pointers to arrays. The debugger can tell
   --  which format is in use by the form of the type of the pointer.

   --    Fat Pointers

   --      Fat pointers are represented as a struct with two fields

   --        P_ARRAY is a pointer to the array type. The array type
   --        here will not have useful bounds, e.g. in the case of
   --        pointer to String, the index type will be Natural.

   --        P_BOUNDS is a pointer to a struct which has fields of the
   --        form

   --           LBn (n a decimal integer) lower bound of n'th dimension
   --           UBn (n a decimal integer) upper bound of n'th dimension

   --        The bounds may be any integral type. In the case of an
   --        enumeration type, Enum_Rep values are used.

   --    Thin Pointers

   --      Thin pointers are represented as a pointer to a structure
   --      with two fields. The field ARRAY contains the array value
   --      The field BOUNDS is a struct containing the bounds as above
   --      Note that this array field is typically a variable length
   --      array, and consequently the entire record structure will
   --      be encoded as previously described.

   -----------------------------
   -- Variant Record Encoding --
   -----------------------------

   --  The variant part of a variant record is encoded as a single field
   --  in the enclosing record, whose name is:

   --     discrim___XVN

   --  where discrim is the unqualified name of the variant. This field name
   --  is built by gigi (not by code in this unit). Note that in the case
   --  of an Unchecked_Union record, this discriminant will not appear in
   --  the record, and the debugger must proceed accordingly (basically it
   --  can treat this case as it would a C union).

   --  The type corresponding to this field that is generated in the
   --  debugging unit is a C union, in which each member of the union
   --  corresponds to one variant. The name of the union member is
   --  encoded to indicate the choices, and is a string given by the
   --  following grammar:

   --    union_name ::= {choice} | others_choice
   --    choice ::= simple_choice | range_choice
   --    simple_choice ::= S number
   --    range_choice  ::= R number T number
   --    number ::= {decimal_digit} [m]
   --    others_choice ::= O (upper case letter O)

   --  The m in a number indicates a negative value. As an example of this
   --  encoding scheme, the choice 1 .. 4 | 7 | -10 would be represented by

   --    R1T4S7S10m

   --  Note that in the case of enumeration values, the values used are the
   --  actual representation values in the case where an enumeration type
   --  has an enumeration representation spec (i.e. they are values that
   --  correspond to the use of the Enum_Rep attribute).

   --  If the variant appears in a record with at least one variable length
   --  field, for which a parallel type is constructed (the _XVE type which
   --  is described above), then the parallel type is constructed as follows:

   --    If the variant part is at a variable position (i.e. it follows a
   --    previous variable-length component), then its bit position is encoded
   --    as -A, where A is the alignment requirement of the variant part in
   --    bits (this is the same treatment used for any field, i.e. the field
   --    whose name is discrim_XVA is treated like any other field).

   --    Within the variants themselves, bit positions are encoded from the
   --    start of the variant part as usual. This means that even if the
   --    entire variant part has a variable position, fields within the
   --    variant may have static positions, since they are just offsets
   --    from the start of the variant part.

   --    If a variant itself contains a variable-length component, then
   --    it is encoded using a name_XA field with an access type as usual,
   --    and in this case, following fields in that variant will have the
   --    usual -A encoding for the bit position to indicate that their
   --    position (i.e. offset from the start of the variant part) is
   --    variable and must be computed.

   procedure Get_Variant_Encoding (V : Node_Id);
   --  This procedure is called by Gigi with V being the variant node.
   --  The corresponding encoding string is returned in Name_Buffer with
   --  the length of the string in Name_Len, and an ASCII.NUL character
   --  stored following the name.

end Exp_Dbug;
