Actual source code: petsclandau.h

  1: #pragma once

  3: #include <petscdmplex.h>
  4: #include <petscts.h>

  6: /* MANSEC = TS */
  7: /* SUBMANSEC = LANDAU */

  9: PETSC_EXTERN PetscErrorCode DMPlexLandauPrintNorms(Vec, PetscInt);
 10: PETSC_EXTERN PetscErrorCode DMPlexLandauCreateVelocitySpace(MPI_Comm, PetscInt, const char[], Vec *, Mat *, DM *);
 11: PETSC_EXTERN PetscErrorCode DMPlexLandauDestroyVelocitySpace(DM *);
 12: PETSC_EXTERN PetscErrorCode DMPlexLandauAccess(DM, Vec, PetscErrorCode (*)(DM, Vec, PetscInt, PetscInt, PetscInt, void *), void *);
 13: PETSC_EXTERN PetscErrorCode DMPlexLandauAddMaxwellians(DM, Vec, PetscReal, PetscReal[], PetscReal[], PetscInt, PetscInt, PetscInt, void *);
 14: PETSC_EXTERN PetscErrorCode DMPlexLandauCreateMassMatrix(DM dm, Mat *Amat);
 15: PETSC_EXTERN PetscErrorCode DMPlexLandauIFunction(TS, PetscReal, Vec, Vec, Vec, void *);
 16: PETSC_EXTERN PetscErrorCode DMPlexLandauIJacobian(TS, PetscReal, Vec, Vec, PetscReal, Mat, Mat, void *);

 18: /*MC
 19:    LandauIdx - Integer type used to index entries in the `DMPLEX` Landau collision-operator data structures, such as the COO matrix workspaces and the `P4estVertexMaps` reduced-quadrature maps

 21:    Level: developer

 23:    Note:
 24:    `LandauIdx` is a `PetscInt`; it is named separately so the device-side data structures used by the Landau collision operator can be sized independently from the rest of PETSc if needed.

 26: .seealso: `DMPlexLandauCreateVelocitySpace()`, `LandauStaticData`, `LandauCtx`, `P4estVertexMaps`
 27: M*/
 28: typedef PetscInt LandauIdx;

 30: /* the Fokker-Planck-Landau context */
 31: #if !defined(LANDAU_MAX_SPECIES)
 32:   #if defined(PETSC_USE_DMLANDAU_2D)
 33:     #define LANDAU_MAX_SPECIES 10
 34:     #define LANDAU_MAX_GRIDS   3
 35:   #else
 36:     #define LANDAU_MAX_SPECIES 10
 37:     #define LANDAU_MAX_GRIDS   3
 38:   #endif
 39: #else
 40:   #define LANDAU_MAX_GRIDS 3
 41: #endif

 43: #if !defined(LANDAU_MAX_Q)
 44:   #if defined(LANDAU_MAX_NQND)
 45:     #error "LANDAU_MAX_NQND but not LANDAU_MAX_Q. Use -DLANDAU_MAX_Q=4 for Q3 elements"
 46:   #endif
 47:   #if defined(PETSC_USE_DMLANDAU_2D)
 48:     #define LANDAU_MAX_Q 6
 49:   #else
 50:     #define LANDAU_MAX_Q 6
 51:   #endif
 52: #else
 53:   #undef LANDAU_MAX_NQND
 54: #endif

 56: #if defined(PETSC_USE_DMLANDAU_2D)
 57:   #define LANDAU_MAX_Q_FACE   LANDAU_MAX_Q
 58:   #define LANDAU_MAX_NQND     (LANDAU_MAX_Q * LANDAU_MAX_Q)
 59:   #define LANDAU_MAX_BATCH_SZ 1024
 60:   #define LANDAU_DIM          2
 61: #else
 62:   #define LANDAU_MAX_Q_FACE   (LANDAU_MAX_Q * LANDAU_MAX_Q)
 63:   #define LANDAU_MAX_NQND     (LANDAU_MAX_Q * LANDAU_MAX_Q * LANDAU_MAX_Q)
 64:   #define LANDAU_MAX_BATCH_SZ 64
 65:   #define LANDAU_DIM          3
 66: #endif

 68: /*E
 69:    LandauDeviceType - Selects the backend used to evaluate the Landau collision-operator Jacobian and to hold its workspace data

 71:    Values:
 72: +   `LANDAU_KOKKOS` - run on the device with the Kokkos backend (requires PETSc configured `--with-kokkos`)
 73: -   `LANDAU_CPU`    - run on the host (default when Kokkos is not enabled)

 75:    Level: intermediate

 77: .seealso: `DMPlexLandauCreateVelocitySpace()`, `LandauCtx`, `LandauStaticData`
 78: E*/
 79: typedef enum {
 80:   LANDAU_KOKKOS,
 81:   LANDAU_CPU
 82: } LandauDeviceType;

 84: /*S
 85:    LandauStaticData - Workspace of pre-computed quadrature, geometry, and physics data that is shared by every Jacobian assembly of the `DMPLEX` Landau collision operator

 87:    Level: developer

 89:    Note:
 90:    The fields are typed as `void *` so that the same struct can hold either host arrays or device (Kokkos/CUDA/HIP) arrays depending on `LandauDeviceType`.
 91:    The contents are managed by `DMPlexLandauCreateVelocitySpace()` and friends and are not intended to be inspected by user code.

 93: .seealso: `DMPlexLandauCreateVelocitySpace()`, `LandauCtx`, `LandauDeviceType`, `LandauIdx`
 94: S*/
 95: typedef struct {
 96:   void *invJ;    // nip*dim*dim
 97:   void *D;       // nq*nb*dim
 98:   void *B;       // nq*nb
 99:   void *alpha;   // ns
100:   void *beta;    // ns
101:   void *invMass; // ns
102:   void *w;       // nip
103:   void *x;       // nip
104:   void *y;       // nip
105:   void *z;       // nip
106:   void *Eq_m;    // ns - dynamic
107:   void *f;       //  nip*Nf - dynamic (IP)
108:   void *dfdx;    // nip*Nf - dynamic (IP)
109:   void *dfdy;    // nip*Nf - dynamic (IP)
110:   void *dfdz;    // nip*Nf - dynamic (IP)
111:   int   dim_, ns_, nip_, nq_, nb_;
112:   void *NCells;         // remove and use elem_offset - TODO
113:   void *species_offset; // for each grid, but same for all batched vertices
114:   void *mat_offset;     // for each grid, but same for all batched vertices
115:   void *elem_offset;    // for each grid, but same for all batched vertices
116:   void *ip_offset;      // for each grid, but same for all batched vertices
117:   void *ipf_offset;     // for each grid, but same for all batched vertices
118:   void *ipfdf_data;     // for each grid, but same for all batched vertices
119:   void *maps;           // for each grid, but same for all batched vertices
120:   // COO
121:   void     *coo_elem_offsets;
122:   void     *coo_elem_point_offsets;
123:   void     *coo_elem_fullNb;
124:   void     *coo_vals;
125:   void     *lambdas;
126:   LandauIdx coo_n_cellsTot;
127:   LandauIdx coo_size;
128:   LandauIdx coo_max_fullnb;
129: } LandauStaticData;

131: /*E
132:    LandauOMPTimers - Identifiers for the timing slots kept in `LandauCtx.times[]` for the `DMPLEX` Landau collision operator

134:    Values:
135: +   `LANDAU_EX2_TSSOLVE`    - total `TSSolve()` time of the Landau example
136: .   `LANDAU_MATRIX_TOTAL`   - total time spent constructing the Jacobian and mass matrices
137: .   `LANDAU_OPERATOR`       - time inside the Landau operator evaluation
138: .   `LANDAU_JACOBIAN_COUNT` - number of Jacobian evaluations (stored as a count, reused as a timer slot)
139: .   `LANDAU_JACOBIAN`       - time inside Jacobian construction
140: .   `LANDAU_MASS`           - time inside mass-matrix construction
141: .   `LANDAU_F_DF`           - time evaluating the distribution function and its derivatives
142: .   `LANDAU_KERNEL`         - time inside the Landau collision kernel
143: .   `KSP_FACTOR`            - time inside the KSP factor stage when using a direct solver
144: .   `KSP_SOLVE`             - time inside `KSPSolve()`
145: -   `LANDAU_NUM_TIMERS`     - sentinel; equals the number of timer slots allocated in `LandauCtx`

147:    Level: developer

149: .seealso: `LandauCtx`, `DMPlexLandauCreateVelocitySpace()`
150: E*/
151: typedef enum {
152:   LANDAU_EX2_TSSOLVE,
153:   LANDAU_MATRIX_TOTAL,
154:   LANDAU_OPERATOR,
155:   LANDAU_JACOBIAN_COUNT,
156:   LANDAU_JACOBIAN,
157:   LANDAU_MASS,
158:   LANDAU_F_DF,
159:   LANDAU_KERNEL,
160:   KSP_FACTOR,
161:   KSP_SOLVE,
162:   LANDAU_NUM_TIMERS
163: } LandauOMPTimers;

165: /*S
166:    LandauCtx - Application context for the `DMPLEX` Landau collision operator that records the species data, mesh configuration, AMR settings, batching parameters, and pre-computed static data needed to evaluate the operator

168:    Level: intermediate

170:    Note:
171:    The context is created and managed by `DMPlexLandauCreateVelocitySpace()` and is attached to the returned `DM` as its application context. User code normally obtains it with `DMGetApplicationContext()` rather than constructing it directly.

173: .seealso: `DMPlexLandauCreateVelocitySpace()`, `DMPlexLandauDestroyVelocitySpace()`, `DMPlexLandauIFunction()`, `DMPlexLandauIJacobian()`,
174:           `LandauStaticData`, `LandauDeviceType`, `LandauOMPTimers`
175: S*/
176: typedef struct {
177:   PetscBool interpolate; /* Generate intermediate mesh elements */
178:   PetscBool gpu_assembly;
179:   MPI_Comm  comm; /* global communicator to use for errors and diagnostics */
180:   double    times[LANDAU_NUM_TIMERS];
181:   PetscBool use_matrix_mass;
182:   /* FE */
183:   PetscFE fe[LANDAU_MAX_SPECIES];
184:   /* geometry  */
185:   PetscReal radius[LANDAU_MAX_GRIDS];
186:   PetscReal radius_par[LANDAU_MAX_GRIDS];
187:   PetscReal radius_perp[LANDAU_MAX_GRIDS];
188:   PetscReal re_radius;      /* RE: radius of refinement along v_perp=0, z>0 */
189:   PetscReal vperp0_radius1; /* RE: radius of refinement along v_perp=0 */
190:   PetscReal vperp0_radius2; /* RE: radius of refinement along v_perp=0 after origin AMR refinement */
191:   PetscBool sphere;
192:   PetscBool map_sphere;
193:   PetscReal sphere_inner_radius_90degree[LANDAU_MAX_GRIDS];
194:   PetscReal sphere_inner_radius_45degree[LANDAU_MAX_GRIDS];
195:   PetscInt  cells0[3];
196:   /* AMR */
197:   PetscBool use_p4est;
198:   PetscInt  numRERefine;                     /* RE: refinement along v_perp=0, z > 0 */
199:   PetscInt  nZRefine1;                       /* RE: origin refinement after v_perp=0 refinement */
200:   PetscInt  nZRefine2;                       /* RE: origin refinement after origin AMR refinement */
201:   PetscInt  numAMRRefine[LANDAU_MAX_GRIDS];  /* normal AMR - refine from origin */
202:   PetscInt  postAMRRefine[LANDAU_MAX_GRIDS]; /* uniform refinement of AMR */
203:   PetscBool simplex;
204:   char      filename[PETSC_MAX_PATH_LEN];
205:   PetscReal thermal_speed[LANDAU_MAX_GRIDS];
206:   PetscBool sphere_uniform_normal;
207:   /* relativistic */
208:   PetscBool use_energy_tensor_trick;
209:   PetscBool use_relativistic_corrections;
210:   /* physics */
211:   PetscReal thermal_temps[LANDAU_MAX_SPECIES];
212:   PetscReal masses[LANDAU_MAX_SPECIES];  /* mass of each species  */
213:   PetscReal charges[LANDAU_MAX_SPECIES]; /* charge of each species  */
214:   PetscReal n[LANDAU_MAX_SPECIES];       /* number density of each species  */
215:   PetscReal m_0;                         /* reference mass */
216:   PetscReal v_0;                         /* reference velocity */
217:   PetscReal n_0;                         /* reference number density */
218:   PetscReal t_0;                         /* reference time */
219:   PetscReal Ez;
220:   PetscReal epsilon0;
221:   PetscReal k;
222:   PetscReal lambdas[LANDAU_MAX_GRIDS][LANDAU_MAX_GRIDS];
223:   PetscReal electronShift;
224:   PetscInt  num_species;
225:   PetscInt  num_grids;
226:   PetscInt  species_offset[LANDAU_MAX_GRIDS + 1]; // for each grid, but same for all batched vertices
227:   PetscInt  mat_offset[LANDAU_MAX_GRIDS + 1];     // for each grid, but same for all batched vertices
228:   // batching
229:   PetscBool  jacobian_field_major_order; // this could be a type but lets not get pedantic
230:   VecScatter plex_batch;
231:   Vec        work_vec;
232:   IS         batch_is;
233:   PetscErrorCode (*seqaij_mult)(Mat, Vec, Vec);
234:   PetscErrorCode (*seqaij_multtranspose)(Mat, Vec, Vec);
235:   PetscErrorCode (*seqaij_solve)(Mat, Vec, Vec);
236:   PetscErrorCode (*seqaij_getdiagonal)(Mat, Vec);
237:   /* COO */
238:   Mat J;
239:   Mat M;
240:   Vec X;
241:   /* derived type */
242:   void *data;
243:   /* computing */
244:   LandauDeviceType deviceType;
245:   DM               pack;
246:   DM               plex[LANDAU_MAX_GRIDS];
247:   LandauStaticData SData_d; /* static geometric data on device */
248:   /* diagnostics */
249:   PetscInt         verbose;
250:   PetscLogEvent    events[20];
251:   PetscLogStage    stage;
252:   PetscObjectState norm_state;
253:   PetscInt         batch_sz;
254:   PetscInt         batch_view_idx;
255: } LandauCtx;

257: #define LANDAU_SPECIES_MAJOR
258: #if !defined(LANDAU_SPECIES_MAJOR)
259:   #define LAND_PACK_IDX(_b, _g)                         (_b * ctx->num_grids + _g)
260:   #define LAND_MOFFSET(_b, _g, _nbch, _ngrid, _mat_off) (_b * _mat_off[_ngrid] + _mat_off[_g])
261: #else
262:   #define LAND_PACK_IDX(_b, _g)                         (_g * ctx->batch_sz + _b)
263:   #define LAND_MOFFSET(_b, _g, _nbch, _ngrid, _mat_off) (_nbch * _mat_off[_g] + _b * (_mat_off[_g + 1] - _mat_off[_g]))
264: #endif

266: /*S
267:    pointInterpolationP4est - One entry in the reduced quadrature-point map used by the `DMPLEX` Landau collision operator; pairs a global Landau matrix index with the weight that scales the contribution of the corresponding quadrature point

269:    Level: developer

271:    Note:
272:    These records are arrays inside `P4estVertexMaps` and describe how multiple coincident quadrature points produced by the p4est-based AMR mesh are combined into a single entry of the Landau Jacobian.

274: .seealso: `LandauIdx`, `LandauCtx`, `LandauStaticData`, `DMPlexLandauCreateVelocitySpace()`
275: S*/
276: typedef struct {
277:   PetscReal scale;
278:   LandauIdx gid; // Landau matrix index (<10,000)
279: } pointInterpolationP4est;
280: typedef struct _lP4estVertexMaps {
281:   LandauIdx (*gIdx)[LANDAU_MAX_SPECIES][LANDAU_MAX_NQND]; // #elems *  LANDAU_MAX_NQND
282:   LandauIdx        num_elements;
283:   LandauIdx        num_reduced;
284:   LandauIdx        num_face; // (Q or Q^2 for 3D)
285:   LandauDeviceType deviceType;
286:   PetscInt         Nf;
287:   pointInterpolationP4est (*c_maps)[LANDAU_MAX_Q_FACE];
288:   struct _lP4estVertexMaps *d_self;
289:   void                     *vp1, *vp2, *vp3;
290:   PetscInt                  numgrids;
291: } P4estVertexMaps;

293: #if defined(PETSC_HAVE_KOKKOS)
294: PETSC_EXTERN PetscErrorCode LandauKokkosJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat);
295: PETSC_EXTERN PetscErrorCode LandauKokkosCreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt);
296: PETSC_EXTERN PetscErrorCode LandauKokkosDestroyMatMaps(P4estVertexMaps *, PetscInt);
297: PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], LandauStaticData *);
298: PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataClear(LandauStaticData *);
299: #endif