#ifdef PETSC_RCS_HEADER
static char vcid[] = "$Id: varorder2d.c,v 1.8 2000/01/30 18:31:33 huangp Exp $";
#endif

#include "src/grid/gridimpl.h"         /*I "grid.h" I*/
#include "varorder2d.h"

#undef  __FUNCT__
#define __FUNCT__ "GridCreateVarOrdering_Triangular_2D"
int GridCreateVarOrdering_Triangular_2D(Grid grid, FieldClassMap map, VarOrdering *order) {
  Mesh                  mesh;
  Partition             part;
  PetscConstraintObject constCtx        = grid->constraintCtx;
  int                   numFields       = map->numFields;
  int                  *fields          = map->fields;
  int                   numNodes        = map->numNodes;
  int                   numOverlapNodes = map->numOverlapNodes;
  int                   numGhostNodes   = map->numGhostNodes;
  int                   numClasses      = map->numClasses;
  int                 **fieldClasses    = map->fieldClasses;
  int                  *classes         = map->classes;
  int                  *classSizes      = map->classSizes;
  int                  *localOffsets;
  int                   numNewVars;
  VarOrdering           o;
  /* Ghost variable communication */
  int                  *ghostSendVars;    /* Number of ghost variables on a given processor interior to this domain */
  int                  *sumSendVars;      /* Prefix sums of ghostSendVars */
  int                  *ghostRecvVars;    /* Number of ghost variables on a given processor */
  int                  *sumRecvVars;      /* Prefix sums of ghostRecvVars */
  int                  *displs;           /* Offsets into ghostRecvVars */
  int                   numSendGhostVars; /* The number of ghost variable offsets to send to other processors */
  int                  *sendGhostBuffer;  /* Recv: Global node numbers Send: Offsets of these nodes */
  int                   numProcs, rank;
  int                   proc, f, field, comp, node, locNode, gNode, nclass, var;
  int                   ierr;

  PetscFunctionBegin;
  /* Create the ordering */
  PetscHeaderCreate(o, _VarOrdering, int, VAR_ORDER_COOKIE, 0, "VarOrdering", grid->comm, VarOrderingDestroy, 0);
  PetscLogObjectCreate(o);
  ierr = PetscObjectCompose((PetscObject) o, "ClassMap", (PetscObject) map);                              CHKERRQ(ierr);

  /* Allocate memory */
  ierr = MPI_Comm_size(grid->comm, &numProcs);                                                            CHKERRQ(ierr);
  ierr = MPI_Comm_rank(grid->comm, &rank);                                                                CHKERRQ(ierr);
  ierr = GridGetNumFields(grid, &o->numTotalFields);                                                      CHKERRQ(ierr);
  ierr = PetscMalloc((numProcs+1)      * sizeof(int),   &o->firstVar);                                    CHKERRQ(ierr);
  ierr = PetscMalloc(numOverlapNodes   * sizeof(int),   &o->offsets);                                     CHKERRQ(ierr);
  ierr = PetscMalloc(o->numTotalFields * sizeof(int *), &o->localStart);                                  CHKERRQ(ierr);
  PetscLogObjectMemory(o, (numProcs+1 + numOverlapNodes + o->numTotalFields*numClasses)*sizeof(int) + o->numTotalFields*sizeof(int *));
  ierr = PetscMemzero(o->localStart, o->numTotalFields * sizeof(int *));                                  CHKERRQ(ierr);
  o->numLocNewVars = 0;
  o->numNewVars    = 0;

  /* Setup domain variable numbering */
  o->offsets[0] = 0;
  for(node = 1; node < numNodes; node++)
    o->offsets[node] = o->offsets[node-1] + classSizes[classes[node-1]];
  o->numLocVars = o->offsets[numNodes-1] + classSizes[classes[numNodes-1]];
  if (map->isConstrained == PETSC_TRUE) {
    ierr = (*constCtx->ops->getsize)(constCtx, &o->numLocNewVars, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
    CHKERRQ(ierr);
    o->numLocVars += o->numLocNewVars;
  }
  ierr = MPI_Allgather(&o->numLocVars, 1, MPI_INT, &o->firstVar[1], 1, MPI_INT, o->comm);                 CHKERRQ(ierr);
  o->firstVar[0] = 0;
  for(proc = 1; proc <= numProcs; proc++)
    o->firstVar[proc] += o->firstVar[proc-1];
  o->numVars = o->firstVar[numProcs];
  if (map->isConstrained == PETSC_TRUE) {
    ierr = (*constCtx->ops->getsize)(constCtx, PETSC_NULL, &o->numNewVars, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
    CHKERRQ(ierr);
    ierr = MPI_Allreduce(&o->numLocNewVars, &numNewVars, 1, MPI_INT, MPI_SUM, o->comm);                   CHKERRQ(ierr);
    if (o->numNewVars != numNewVars)
      SETERRQ(PETSC_ERR_PLIB, "Invalid partition of new variables");
  }

  /* Initialize overlap size */
  o->numOverlapVars    = o->numLocVars;
  o->numOverlapNewVars = o->numLocNewVars;

  ierr = GridGetMesh(grid, &mesh);                                                                        CHKERRQ(ierr);
  ierr = MeshGetPartition(mesh, &part);                                                                   CHKERRQ(ierr);
  if (numProcs > 1) {
    /* Map local to global variable numbers */
    for(node = 0; node < numNodes; node++)
      o->offsets[node] += o->firstVar[rank];

#if 0
    ierr = GridGhostExchange(o->comm, numGhostNodes, q->ghostNodeProcs, q->ghostNodes, PETSC_INT,
                             q->firstNode, INSERT_VALUES, SCATTER_FORWARD, o->offsets, &o->offsets[numNodes]);
#else
    /* Initialize communication */
    ierr = PetscMalloc(numProcs * sizeof(int), &ghostSendVars);                                           CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &sumSendVars);                                             CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &ghostRecvVars);                                           CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &sumRecvVars);                                             CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &displs);                                                  CHKERRQ(ierr);
    ierr = PetscMemzero(ghostSendVars, numProcs * sizeof(int));                                           CHKERRQ(ierr);
    ierr = PetscMemzero(sumSendVars,   numProcs * sizeof(int));                                           CHKERRQ(ierr);
    ierr = PetscMemzero(ghostRecvVars, numProcs * sizeof(int));                                           CHKERRQ(ierr);
    ierr = PetscMemzero(sumRecvVars,   numProcs * sizeof(int));                                           CHKERRQ(ierr);
    ierr = PetscMemzero(displs,        numProcs * sizeof(int));                                           CHKERRQ(ierr);

    /* Get number of ghost variables to receive from each processor and size of blocks --
         we here assume that classes[] already has ghost node classes in it */
    for(node = 0; node < numGhostNodes; node++) {
      ierr = PartitionGhostToGlobalNodeIndex(part, node, &gNode, &proc);                                  CHKERRQ(ierr);
      nclass = classes[numNodes+node];
      ghostRecvVars[proc]++;
      o->numOverlapVars += classSizes[nclass];
    }

    /* Get number of constrained ghost variables to receive from each processor and size of blocks */
    if (map->isConstrained == PETSC_TRUE) {
      ierr = (*constCtx->ops->getsize)(constCtx, PETSC_NULL, PETSC_NULL, &o->numOverlapNewVars, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
      CHKERRQ(ierr);
    }
    o->numOverlapVars += o->numOverlapNewVars - o->numLocNewVars;

    /* Get sizes of ghost variable blocks to send to each processor */
    ierr = MPI_Alltoall(ghostRecvVars, 1, MPI_INT, ghostSendVars, 1, MPI_INT, o->comm);                   CHKERRQ(ierr);

    /* Calculate offets into the ghost variable receive array */
    for(proc = 1; proc < numProcs; proc++) {
      sumRecvVars[proc] = sumRecvVars[proc-1] + ghostRecvVars[proc-1];
      displs[proc]      = sumRecvVars[proc];
    }

    /* Calculate offsets into the ghost variable send array */
    for(proc = 1; proc < numProcs; proc++)
      sumSendVars[proc] = sumSendVars[proc-1] + ghostSendVars[proc-1];

    /* Send requests for ghost variable offsets to each processor */
    numSendGhostVars = sumSendVars[numProcs-1] + ghostSendVars[numProcs-1];
    ierr = PetscMalloc(numSendGhostVars * sizeof(int), &sendGhostBuffer);                                 CHKERRQ(ierr);
    for(node = 0; node < numGhostNodes; node++) {
      ierr = PartitionGhostToGlobalNodeIndex(part, node, &gNode, &proc);                                  CHKERRQ(ierr);
      o->offsets[numNodes+(displs[proc]++)] = gNode;
    }
    ierr = MPI_Alltoallv(&o->offsets[numNodes],  ghostRecvVars, sumRecvVars, MPI_INT,
                         sendGhostBuffer,        ghostSendVars, sumSendVars, MPI_INT, o->comm);
    CHKERRQ(ierr);

    /* Send ghost variables offsets to each processor */
    for(node = 0; node < numSendGhostVars; node++) {
      ierr = PartitionGlobalToLocalNodeIndex(part, sendGhostBuffer[node], &locNode);                      CHKERRQ(ierr);
      sendGhostBuffer[node] = o->offsets[locNode];
    }
    ierr = MPI_Alltoallv(sendGhostBuffer,       ghostSendVars, sumSendVars, MPI_INT,
                         &o->offsets[numNodes], ghostRecvVars, sumRecvVars, MPI_INT, o->comm);
    CHKERRQ(ierr);

    /* Cleanup */
    ierr = PetscFree(ghostSendVars);                                                                      CHKERRQ(ierr);
    ierr = PetscFree(sumSendVars);                                                                        CHKERRQ(ierr);
    ierr = PetscFree(ghostRecvVars);                                                                      CHKERRQ(ierr);
    ierr = PetscFree(sumRecvVars);                                                                        CHKERRQ(ierr);
    ierr = PetscFree(displs);                                                                             CHKERRQ(ierr);
    ierr = PetscFree(sendGhostBuffer);                                                                    CHKERRQ(ierr);
#endif

    /* We maintain local offsets for ghost variables, meaning the offsets after the last
       interior variable, rather than the offset of the given ghost variable in the global
       matrix. */
    ierr = PetscMalloc(numGhostNodes * sizeof(int), &o->localOffsets);                                    CHKERRQ(ierr);
    for(node = 0, var = o->numLocVars; node < numGhostNodes; node++) {
      o->localOffsets[node] = var;
      nclass = classes[numNodes+node];
      var   += classSizes[nclass];
    }
  }

  /* Allocate memory */
  ierr = PetscMalloc(numClasses * sizeof(int), &localOffsets);                                            CHKERRQ(ierr);
  ierr = PetscMemzero(localOffsets, numClasses * sizeof(int));                                            CHKERRQ(ierr);

  /* Setup local field offsets */
  for(f = 0; f < numFields; f++) {
    field = fields[f];
    ierr  = PetscMalloc(numClasses * sizeof(int), &o->localStart[field]);                                 CHKERRQ(ierr);
    for(nclass = 0; nclass < numClasses; nclass++) {
      if (fieldClasses[f][nclass]) {
        ierr = GridGetFieldComponents(grid, field, &comp);                                                CHKERRQ(ierr);
        o->localStart[field][nclass]  = localOffsets[nclass];
        localOffsets[nclass]         += comp;
      } else {
        o->localStart[field][nclass]  = -1;
      }
    }
  }
  *order = o;

  /* Cleanup */
  ierr = PetscFree(localOffsets);                                                                         CHKERRQ(ierr);

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridVarOrderingConstrain_Triangular_2D"
int GridVarOrderingConstrain_Triangular_2D(Grid grid, int constField, PetscConstraintObject constCtx,
                                           FieldClassMap constMap, VarOrdering oldOrder, VarOrdering *order)
{
  Mesh          mesh;
  Partition     part;
  int           numFields          = constMap->numFields;
  int          *fields             = constMap->fields;
  int           numNodes           = constMap->numNodes;
  int           numOverlapNodes    = constMap->numOverlapNodes;
  int           numGhostNodes      = constMap->numGhostNodes;
  int           numClasses         = constMap->numClasses;
  int          *classes            = constMap->classes;
  int          *classMap           = constMap->classMap[constMap->mapSize-1];
  int         **localStart         = oldOrder->localStart;
  int           numClassesOld;
  int           comp;
  FieldClassMap map;
  VarOrdering   o;
  /* Ghost variable communication */
  int          *ghostSendVars;    /* Number of ghost variables on a given processor interior to this domain */
  int          *sumSendVars;      /* Prefix sums of ghostSendVars */
  int          *ghostRecvVars;    /* Number of ghost variables on a given processor */
  int          *sumRecvVars;      /* Prefix sums of ghostRecvVars */
  int          *displs;           /* Offsets into ghostRecvVars */
  int           numSendGhostVars; /* The number of ghost variable offsets to send to other processors */
  int          *sendGhostBuffer;  /* Recv: Global node numbers Send: Offsets of these nodes */
  int           numProcs, rank;
  int           proc, f, field, node, locNode, gNode, nclass, i, var;
  int           ierr;

  PetscFunctionBegin;
  /* Create the ordering */
  PetscHeaderCreate(o, _VarOrdering, int, VAR_ORDER_COOKIE, 0, "VarOrdering", grid->comm, VarOrderingDestroy, 0);
  PetscLogObjectCreate(o);
  ierr = PetscObjectCompose((PetscObject) o, "ClassMap", (PetscObject) constMap);                         CHKERRQ(ierr);

  /* Allocate memory */
  ierr = MPI_Comm_size(grid->comm, &numProcs);                                                            CHKERRQ(ierr);
  ierr = MPI_Comm_rank(grid->comm, &rank);                                                                CHKERRQ(ierr);
  ierr = GridGetFieldComponents(grid, constField, &comp);                                                 CHKERRQ(ierr);
  o->numTotalFields = oldOrder->numTotalFields;
  ierr = PetscMalloc((numProcs+1)      * sizeof(int),   &o->firstVar);                                    CHKERRQ(ierr);
  ierr = PetscMalloc(numOverlapNodes   * sizeof(int),   &o->offsets);                                     CHKERRQ(ierr);
  ierr = PetscMalloc(o->numTotalFields * sizeof(int *), &o->localStart);                                  CHKERRQ(ierr);
  PetscLogObjectMemory(o, (numProcs+1 + numOverlapNodes + o->numTotalFields*numClasses) * sizeof(int) +
                       o->numTotalFields * sizeof(int *));
  ierr = PetscMemzero(o->localStart, o->numTotalFields * sizeof(int *));                                  CHKERRQ(ierr);

  /* Setup domain variable numbering */
  if (numOverlapNodes < numNodes) SETERRQ(PETSC_ERR_PLIB, "Invalid node partition");
  o->offsets[0] = 0;
  for(node = 1; node < numNodes; node++)
    o->offsets[node]    = o->offsets[node-1] + constMap->classSizes[classes[node-1]];
  o->numLocVars = o->offsets[numNodes-1] + constMap->classSizes[classes[numNodes-1]];
  ierr = (*constCtx->ops->getsize)(constCtx, &o->numLocNewVars, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
  CHKERRQ(ierr);
  o->numLocVars        += o->numLocNewVars;
  ierr = MPI_Allgather(&o->numLocVars, 1, MPI_INT, &o->firstVar[1], 1, MPI_INT, o->comm);                 CHKERRQ(ierr);
  o->firstVar[0] = 0;
  for(proc = 1; proc <= numProcs; proc++)
    o->firstVar[proc] += o->firstVar[proc-1];
  o->numVars = o->firstVar[numProcs];
#ifdef PETSC_USE_BOPT_g
  ierr = (*constCtx->ops->getsize)(constCtx, PETSC_NULL, &o->numNewVars, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
  CHKERRQ(ierr);
  ierr = MPI_Allreduce(&o->numLocNewVars, &i, 1, MPI_INT, MPI_SUM, o->comm);                              CHKERRQ(ierr);
  if (o->numNewVars != i) SETERRQ(PETSC_ERR_PLIB, "Invalid partition of new variables");
#endif

  /* Initialize variable overlap */
  o->numOverlapVars    = o->numLocVars;
  o->numOverlapNewVars = o->numLocNewVars;

  ierr = GridGetMesh(grid, &mesh);                                                                        CHKERRQ(ierr);
  ierr = MeshGetPartition(mesh, &part);                                                                   CHKERRQ(ierr);
  if (numProcs > 1) {
    /* Map local to global variable numbers */
    for(node = 0; node < numNodes; node++)
      o->offsets[node] += o->firstVar[rank];

    /* Initialize communication */
    ierr = PetscMalloc(numProcs * sizeof(int), &ghostSendVars);                                           CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &sumSendVars);                                             CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &ghostRecvVars);                                           CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &sumRecvVars);                                             CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &displs);                                                  CHKERRQ(ierr);
    ierr = PetscMemzero(ghostSendVars, numProcs * sizeof(int));                                           CHKERRQ(ierr);
    ierr = PetscMemzero(sumSendVars,   numProcs * sizeof(int));                                           CHKERRQ(ierr);
    ierr = PetscMemzero(ghostRecvVars, numProcs * sizeof(int));                                           CHKERRQ(ierr);
    ierr = PetscMemzero(sumRecvVars,   numProcs * sizeof(int));                                           CHKERRQ(ierr);
    ierr = PetscMemzero(displs,        numProcs * sizeof(int));                                           CHKERRQ(ierr);

    /* Get number of ghost variables to receive from each processor and size of blocks --
         we here assume that classes[] already has ghost node classes in it */
    for(node = 0; node < numGhostNodes; node++) {
      ierr = PartitionGhostToGlobalNodeIndex(part, node, &gNode, &proc);                                  CHKERRQ(ierr);
      nclass = classes[numNodes+node];
      ghostRecvVars[proc]++;
      o->numOverlapVars += constMap->classSizes[nclass];
    }

    /* Get number of constrained ghost variables to receive from each processor and size of blocks */
    ierr = (*constCtx->ops->getsize)(constCtx, PETSC_NULL, PETSC_NULL, &o->numOverlapNewVars, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
    CHKERRQ(ierr);
    o->numOverlapVars += o->numOverlapNewVars - o->numLocNewVars;

    /* Get sizes of ghost variable blocks to send to each processor */
    ierr = MPI_Alltoall(ghostRecvVars, 1, MPI_INT, ghostSendVars, 1, MPI_INT, o->comm);                   CHKERRQ(ierr);

    /* Calculate offets into the ghost variable receive array */
    for(proc = 1; proc < numProcs; proc++) {
      sumRecvVars[proc] = sumRecvVars[proc-1] + ghostRecvVars[proc-1];
      displs[proc]      = sumRecvVars[proc];
    }

    /* Calculate offsets into the ghost variable send array */
    for(proc = 1; proc < numProcs; proc++)
      sumSendVars[proc] = sumSendVars[proc-1] + ghostSendVars[proc-1];

    /* Send requests for ghost variable offsets to each processor */
    numSendGhostVars = sumSendVars[numProcs-1] + ghostSendVars[numProcs-1];
    ierr = PetscMalloc(numSendGhostVars * sizeof(int), &sendGhostBuffer);                                 CHKERRQ(ierr);
    for(node = 0; node < numGhostNodes; node++) {
      ierr = PartitionGhostToGlobalNodeIndex(part, node, &gNode, &proc);                                  CHKERRQ(ierr);
      o->offsets[numNodes+(displs[proc]++)] = gNode;
    }
    ierr = MPI_Alltoallv(&o->offsets[numNodes],  ghostRecvVars, sumRecvVars, MPI_INT,
                         sendGhostBuffer,        ghostSendVars, sumSendVars, MPI_INT, o->comm);
    CHKERRQ(ierr);

    /* Send ghost variables offsets to each processor */
    for(node = 0; node < numSendGhostVars; node++) {
      ierr = PartitionGlobalToLocalNodeIndex(part, sendGhostBuffer[node], &locNode);                      CHKERRQ(ierr);
      sendGhostBuffer[node] = o->offsets[locNode];
    }
    ierr = MPI_Alltoallv(sendGhostBuffer,       ghostSendVars, sumSendVars, MPI_INT,
                         &o->offsets[numNodes], ghostRecvVars, sumRecvVars, MPI_INT, o->comm);
    CHKERRQ(ierr);

    /* Cleanup */
    ierr = PetscFree(ghostSendVars);                                                                      CHKERRQ(ierr);
    ierr = PetscFree(sumSendVars);                                                                        CHKERRQ(ierr);
    ierr = PetscFree(ghostRecvVars);                                                                      CHKERRQ(ierr);
    ierr = PetscFree(sumRecvVars);                                                                        CHKERRQ(ierr);
    ierr = PetscFree(displs);                                                                             CHKERRQ(ierr);
    ierr = PetscFree(sendGhostBuffer);                                                                    CHKERRQ(ierr);

    /* We maintain local offsets for ghost variables, meaning the offsets after the last
       interior variable, rather than the offset of the given ghost variable in the global
       matrix. */
    ierr = PetscMalloc(numGhostNodes * sizeof(int), &o->localOffsets);                                    CHKERRQ(ierr);
    for(node = 0, var = o->numLocVars; node < numGhostNodes; node++) {
      o->localOffsets[node] = var;
      nclass = classes[numNodes+node];
      var   += constMap->classSizes[nclass];
    }
  }

  /* Setup local field offsets */
  ierr = VarOrderingGetClassMap(oldOrder, &map);                                                          CHKERRQ(ierr);
  numClassesOld = map->numClasses;
  for(f = 0; f < numFields; f++) {
    field = fields[f];
    ierr  = PetscMalloc(numClasses * sizeof(int), &o->localStart[field]);                                 CHKERRQ(ierr);
    for(nclass = 0; nclass < numClassesOld; nclass++) {
      o->localStart[field][nclass] = localStart[field][nclass];
    }
    for(i = numClassesOld; i < numClasses; i++) {
      /* Invert the class map */
      for(nclass = 0; nclass < numClassesOld; nclass++) {
        if (classMap[nclass] == i) break;
      }
      if (nclass >= numClassesOld) SETERRQ(PETSC_ERR_PLIB, "Invalid class map");

      /* Subtract out the constrained fields */
      o->localStart[field][i]    = localStart[field][nclass];
      if (localStart[field][nclass] > localStart[constField][nclass])
        o->localStart[field][i] -= comp;
    }
  }

  *order = o;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridCreateVarScatter_Triangular_2D"
int GridCreateVarScatter_Triangular_2D(Grid grid, VarOrdering order, GVec vec, VarOrdering embedOrder, GVec embedVec,
                                       VecScatter *scatter)
{
  PetscConstraintObject constCtx = grid->constraintCtx;
  FieldClassMap         map, embedMap;
  int                   numEmbedFields, numNodes;
  int                  *embedFields, *classes, *embedClassSizes;
  int                   numLocVars, numEmbedLocVars, numNewLocVars;
  int                  *offsets, **localStart;
  IS                    is, embedIS;
  int                  *indices;
  PetscTruth            isConstrained;
  int                   node, nclass, f, field, comp, var, count;
  int                   ierr;

  PetscFunctionBegin;
  /* Retrieve orderings */
  ierr = VarOrderingGetClassMap(order,      &map);                                                       CHKERRQ(ierr);
  ierr = VarOrderingGetClassMap(embedOrder, &embedMap);                                                  CHKERRQ(ierr);
  numNodes        = map->numNodes;
  classes         = map->classes;
  numLocVars      = order->numLocVars;
  offsets         = order->offsets;
  localStart      = order->localStart;
  numEmbedFields  = embedMap->numFields;
  embedFields     = embedMap->fields;
  embedClassSizes = embedMap->classSizes;
  numEmbedLocVars = embedOrder->numLocVars;

  ierr = PetscMalloc(numEmbedLocVars * sizeof(int), &indices);                                            CHKERRQ(ierr);
  for(node = 0, count = 0; node < numNodes; node++) {
    nclass = classes[node];
    if (embedClassSizes[nclass] > 0) {
      for(f = 0; f < numEmbedFields; f++) {
        field = embedFields[f];
        if (localStart[field][nclass] >= 0) {
          ierr = GridGetFieldComponents(grid, field, &comp);                                             CHKERRQ(ierr);
          for(var = 0; var < comp; var++)
            indices[count++] = offsets[node] + localStart[field][nclass] + var;
        }
      }
    }
  }
  /* Handle new variables */
  ierr = GridIsConstrained(grid, &isConstrained);                                                        CHKERRQ(ierr);
  if ((isConstrained == PETSC_TRUE) && (count < numEmbedLocVars)) {
    ierr = (*constCtx->ops->getsize)(constCtx, &numNewLocVars, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
    CHKERRQ(ierr);
    if (count != numEmbedLocVars - numNewLocVars) SETERRQ(PETSC_ERR_PLIB, "Inconsistent variable orderings");
    /* Fill in the embed variables */
    for(var = numNewLocVars; var > 0; var--)
      indices[count++] = numLocVars - var;
  }
  if (count != numEmbedLocVars) SETERRQ(PETSC_ERR_PLIB, "Inconsistent variable orderings");

  /* Create mappings */
  ierr = ISCreateGeneral(order->comm, numEmbedLocVars, indices, &is);                                    CHKERRQ(ierr);
  ierr = ISCreateStride(order->comm, numEmbedLocVars, 0, 1, &embedIS);                                   CHKERRQ(ierr);

  /* Create the scatter */
  ierr = VecScatterCreate(vec, is, embedVec, embedIS, scatter);                                          CHKERRQ(ierr);

  /* Cleanup */
  ierr = ISDestroy(is);                                                                                  CHKERRQ(ierr);
  ierr = ISDestroy(embedIS);                                                                             CHKERRQ(ierr);
  ierr = PetscFree(indices);                                                                             CHKERRQ(ierr);

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridCreateLocalVarOrdering_Triangular_2D"
int GridCreateLocalVarOrdering_Triangular_2D(Grid grid, int numFields, int *fields, LocalVarOrdering *order)
{
  LocalVarOrdering o;
  int              elemOffset;
  int              f, field;
  int              ierr;

  PetscFunctionBegin;
  /* Create the ordering */
  PetscHeaderCreate(o, _LocalVarOrdering, int, VAR_ORDER_COOKIE, 0, "LocalVarOrdering", grid->comm, LocalVarOrderingDestroy, 0);
  PetscLogObjectCreate(o);

  /* Allocate memory */
  o->numFields = numFields;
  ierr = PetscMalloc(numFields    * sizeof(int), &o->fields);                                             CHKERRQ(ierr);
  ierr = PetscMalloc(grid->numFields * sizeof(int), &o->elemStart);                                       CHKERRQ(ierr);
  PetscLogObjectMemory(o, (numFields + grid->numFields) * sizeof(int));
  ierr = PetscMemcpy(o->fields, fields, numFields * sizeof(int));                                         CHKERRQ(ierr);

  /* Put in sentinel values */
  for(f = 0; f < grid->numFields; f++) {
    o->elemStart[f] = -1;
  }

  /* Setup local and global offsets offsets */
  for(f = 0, elemOffset = 0; f < numFields; f++) {
    field               = fields[f];
    o->elemStart[field] = elemOffset;
    elemOffset         += grid->fields[field].disc->size;
  }
  o->elemSize = elemOffset;
  *order = o;

  PetscFunctionReturn(0);
}
