From 4a240311baa8e14036f7fd0d2498a2eb8b8acdb2 Mon Sep 17 00:00:00 2001 From: hukumka Date: Wed, 30 Sep 2020 12:47:18 +0000 Subject: [PATCH 1/2] Implement opencl based analog for genArea This implementation is a proof of concept, and missing: + Layers past L_SHORE_16 + Support for different minecraft versions --- makefile | 11 +- ocl_generator.c | 615 ++++++++++++++++++++++ ocl_generator.h | 70 +++ ocl_kernels.cl | 1323 +++++++++++++++++++++++++++++++++++++++++++++++ ocl_tests.c | 104 ++++ 5 files changed, 2122 insertions(+), 1 deletion(-) create mode 100644 ocl_generator.c create mode 100644 ocl_generator.h create mode 100644 ocl_kernels.cl create mode 100644 ocl_tests.c diff --git a/makefile b/makefile index d0d10e34..f79f8ceb 100644 --- a/makefile +++ b/makefile @@ -1,6 +1,7 @@ CC = gcc AR = ar ARFLAGS = cr +OCL_FLAGS = -I. -L. -lOpenCL override LDFLAGS = -lm override CFLAGS += -Wall -fwrapv @@ -25,6 +26,9 @@ libcubiomes: CFLAGS += -fPIC libcubiomes: layers.o generator.o finders.o util.o $(AR) $(ARFLAGS) libcubiomes.a $^ +opencl: ocl_generator.o ocl_tests.o libcubiomes.a + $(CC) $(CFLAGS) $(OCL_FLAGS) -o ocl_tests $^ + find_compactbiomes: find_compactbiomes.o layers.o generator.o finders.o $(CC) -o $@ $^ $(LDFLAGS) @@ -37,7 +41,6 @@ find_quadhuts: find_quadhuts.o layers.o generator.o finders.o find_quadhuts.o: find_quadhuts.c $(CC) -c $(CFLAGS) $< - finders.o: finders.c finders.h $(CC) -c $(CFLAGS) $< @@ -50,6 +53,12 @@ layers.o: layers.c layers.h util.o: util.c util.h $(CC) -c $(CFLAGS) $< +ocl_generator.o: ocl_generator.c ocl_generator.h + $(CC) -c $(FLAGS) $< $(OCL_FLAGS) + +ocl_tests.o: ocl_tests.c ocl_generator.h + $(CC) -c $(FLAGS) $< $(OCL_FLAGS) + clean: $(RM) *.o libcubiomes.a find_quadhuts find_compactbiomes diff --git a/ocl_generator.c b/ocl_generator.c new file mode 100644 index 00000000..908af03c --- /dev/null +++ b/ocl_generator.c @@ -0,0 +1,615 @@ +#include "ocl_generator.h" +#include + +static int LAYER_BUFFER[L_NUM] = {0}; +typedef cl_int (*kernel_runner)(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event); +typedef cl_int4 (*size_alteration)(cl_int4 dims); + +// Kernel runners are responcible to assigning buffers to layers. +// Each layer has associated buffer id. Normaly, as buffers lineary depend on each other, +// only single buffer is needed (apart from tmp buffer). +// But in some cases multiple layers depend on the same layer, and thus +// it is necesary to store data of an old layer and compute new layers. +// +// For example: +// Layer `Deep Ocean` needs to be alive until `River Init` is computed, +// so it is stored in buffer[1], while dependent layers as `Biome` can live +// in buffer[0]. +// Layer `River Init` can then live in buffer[1], since +// `Biome Edge` already occupying buffer[0], but previous occupant of buffer[1] +// no longer needed. +static kernel_runner KERNEL_RUNNERS[L_NUM] = {NULL}; +static size_alteration PARENT_SIZES[L_NUM] = {NULL}; +static size_alteration LAYER_WORK_DIMENTIONS[L_NUM] = {NULL}; + +cl_int4 zooming_layer(cl_int4 dims) { + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + int px = x >> 1; + int pz = z >> 1; + dims.s2 = ((x + w) >> 1) - px + 1; + dims.s3 = ((z + h) >> 1) - pz + 1; + dims.s0 = px; + dims.s1 = pz; + return dims; +} + +cl_int4 add_brim_layer(cl_int4 dims) { + dims.s0 -= 1; + dims.s1 -= 1; + dims.s2 += 2; + dims.s3 += 2; + return dims; +} + +cl_int4 zooming_brim_layer(cl_int4 dims) { + return zooming_layer(add_brim_layer(dims)); +} + +cl_int4 layer_size_identity(cl_int4 dims) { + return dims; +} + +static inline cl_int run_layer_kernel_with_dims(int kernel_id, size_t* sdims, cl_mem* in, cl_mem* out, struct GeneratorContext* context, cl_int layer, cl_int4 dims, const cl_event* prev, cl_event* event) { + cl_kernel kernel = context->kernels[kernel_id]; + cl_int err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &context->layersBuffer); + if (err < 0) return err; + err = clSetKernelArg(kernel, 1, sizeof(cl_int), &layer); + if (err < 0) return err; + err = clSetKernelArg(kernel, 2, sizeof(cl_int4), &dims); + if (err < 0) return err; + err = clSetKernelArg(kernel, 3, sizeof(cl_mem), in); + if (err < 0) return err; + err = clSetKernelArg(kernel, 4, sizeof(cl_mem), out); + if (err < 0) return err; + + return clEnqueueNDRangeKernel( + context->queue, kernel, + 3, NULL, sdims, NULL, + 1, prev, event + ); +} + +static inline cl_int run_layer_kernel(int kernel_id, cl_mem* in, cl_mem* out, struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + size_t sdims[3]; + cl_int4 parent_dims = LAYER_WORK_DIMENTIONS[layer](dims); + sdims[0] = parent_dims.s2; + sdims[1] = parent_dims.s3; + sdims[2] = seed_range; + return run_layer_kernel_with_dims(kernel_id, sdims, in, out, context, layer, dims, prev, event); +} + +static inline cl_int run_biome_layer_kernel(int kernel_id, cl_mem* in, cl_mem* out, struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + size_t sdims[3]; + cl_int4 parent_dims = LAYER_WORK_DIMENTIONS[layer](dims); + sdims[0] = parent_dims.s2; + sdims[1] = parent_dims.s3; + sdims[2] = seed_range; + cl_kernel kernel = context->kernels[kernel_id]; + cl_int err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &context->layersBuffer); + if (err < 0) return err; + err = clSetKernelArg(kernel, 1, sizeof(cl_int), &layer); + if (err < 0) return err; + err = clSetKernelArg(kernel, 2, sizeof(cl_int4), &dims); + if (err < 0) return err; + err = clSetKernelArg(kernel, 3, sizeof(cl_mem), &context->biomesBuffer); + if (err < 0) return err; + err = clSetKernelArg(kernel, 4, sizeof(cl_mem), in); + if (err < 0) return err; + err = clSetKernelArg(kernel, 5, sizeof(cl_mem), out); + if (err < 0) return err; + + return clEnqueueNDRangeKernel( + context->queue, kernel, + 3, NULL, sdims, NULL, + 1, prev, event + ); +} + +static inline void swap_buffers(cl_mem** a, cl_mem** b) { + cl_mem* tmp = *a; + *a = *b; + *b = tmp; +} + +// ============================================= +// KERNEL RUNNERS +// +cl_int island_4096_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_MAP_ISLAND, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int zoom_island_2048_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_MAP_ZOOM_ISLAND, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int add_island_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_MAP_ADD_ISLAND, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int zoom_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_MAP_ZOOM, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +// Same as zoom_runner, but user secondary buffer +cl_int zoom_snd_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_SECONDARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_MAP_ZOOM, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +// Zoom and remove brim. +// Brim is added to make sizes of L_DEEP_OCEAN_256 required by L_RIVER_INIT_256 and L_BIOME_256 same. +cl_int zoom_64_hills_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_SECONDARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_event e; + + cl_int4 d0 = zooming_brim_layer(dims); + size_t sdims[3] = {d0.s2, d0.s3, seed_range}; + cl_int err = run_layer_kernel_with_dims(KER_MAP_ZOOM, sdims, *in, *out, context, layer, add_brim_layer(dims), prev, &e); + swap_buffers(in, out); + if (err < 0) return err; + + sdims[0] = dims.s2; + sdims[1] = dims.s3; + sdims[2] = seed_range; + + cl_kernel kernel = context->kernels[KER_RM_BRIM]; + err = clSetKernelArg(kernel, 0, sizeof(cl_int4), &dims); + if (err < 0) return err; + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), *in); + if (err < 0) return err; + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), *out); + if (err < 0) return err; + + err = clEnqueueNDRangeKernel( + context->queue, kernel, + 3, NULL, sdims, NULL, + 1, prev, event + ); + swap_buffers(in, out); + return err; +} + +cl_int remove_ocean_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_REMOVE_OCEAN, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int add_snow_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_ADD_SNOW, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int cool_warm_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_COOL_WARM, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int heat_ice_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_HEAT_ICE, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int special_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_SPECIAL, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int add_mashroom_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_ADD_MUSHROOM, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int deep_ocean_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_SECONDARY]; + cl_int error = run_layer_kernel(KER_DEEP_OCEAN, *in, *out, context, layer, dims, seed_range, prev, event); + return error; +} + +cl_int biomes_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_SECONDARY]; + cl_mem** out = &context->buffer_layout[BUF_PRIMARY]; + cl_int error = run_biome_layer_kernel(KER_BIOMES, *in, *out, context, layer, dims, seed_range, prev, event); + return error; +} + +cl_int add_bamboo_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_ADD_BAMBOO, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int biome_edge_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_biome_layer_kernel(KER_BIOME_EDGE, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int river_init_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_SECONDARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_RIVER_INIT, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int hills_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in1 = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** in2 = &context->buffer_layout[BUF_SECONDARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + + size_t sdims[3]; + cl_int4 parent_dims = LAYER_WORK_DIMENTIONS[layer](dims); + sdims[0] = parent_dims.s2; + sdims[1] = parent_dims.s3; + sdims[2] = seed_range; + cl_kernel kernel = context->kernels[KER_HILLS]; + cl_int err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &context->layersBuffer); + if (err < 0) return err; + err = clSetKernelArg(kernel, 1, sizeof(cl_int), &layer); + if (err < 0) return err; + err = clSetKernelArg(kernel, 2, sizeof(cl_int4), &dims); + if (err < 0) return err; + err = clSetKernelArg(kernel, 3, sizeof(cl_mem), &context->biomesBuffer); + if (err < 0) return err; + err = clSetKernelArg(kernel, 4, sizeof(cl_mem), *in1); + if (err < 0) return err; + err = clSetKernelArg(kernel, 5, sizeof(cl_mem), *in2); + if (err < 0) return err; + err = clSetKernelArg(kernel, 6, sizeof(cl_mem), *out); + if (err < 0) return err; + + err = clEnqueueNDRangeKernel( + context->queue, kernel, + 3, NULL, sdims, NULL, + 1, prev, event + ); + swap_buffers(in1, out); + return err; +} + +cl_int rare_biome_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_layer_kernel(KER_MAP_RARE_BIOMES, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +cl_int shore_runner(struct GeneratorContext* context, cl_int layer, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_mem** in = &context->buffer_layout[BUF_PRIMARY]; + cl_mem** out = &context->buffer_layout[BUF_TMP]; + cl_int error = run_biome_layer_kernel(KER_SHORE, *in, *out, context, layer, dims, seed_range, prev, event); + swap_buffers(in, out); + return error; +} + +/// Setup layer global properties. +/// + layer :: id of layer to set up +/// + runner :: function that will execute openCL kernel. Responcible for selecting proper buffers. +/// + buffer_id :: id of the buffer there the result will be available. +/// + size_alter :: function to determine required size of parent layer, given current layer. +/// + work_dimentions :: function to determine dimentions of global workgroup of kernel. +static inline void init_layer(int layer, kernel_runner runner, int buffer_id, size_alteration size_alter, size_alteration work_dimentions) { + PARENT_SIZES[layer] = size_alter; + KERNEL_RUNNERS[layer] = runner; + LAYER_BUFFER[layer] = buffer_id; + LAYER_WORK_DIMENTIONS[layer] = work_dimentions; +} + +void init_layer_decendants() { + init_layer(L_ISLAND_4096, island_4096_runner, 0, layer_size_identity, layer_size_identity); + init_layer(L_ZOOM_2048, zoom_island_2048_runner, 0, zooming_layer, zooming_layer); + init_layer(L_ADD_ISLAND_2048, add_island_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_ZOOM_1024, zoom_runner, 0, zooming_layer, zooming_layer); + init_layer(L_ADD_ISLAND_1024A, add_island_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_ADD_ISLAND_1024B, add_island_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_ADD_ISLAND_1024C, add_island_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_REMOVE_OCEAN_1024, remove_ocean_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_ADD_SNOW_1024, add_snow_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_ADD_ISLAND_1024D, add_island_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_COOL_WARM_1024, cool_warm_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_HEAT_ICE_1024, heat_ice_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_SPECIAL_1024, special_runner, 0, layer_size_identity, layer_size_identity); + init_layer(L_ZOOM_512, zoom_runner, 0, zooming_layer, zooming_layer); + init_layer(L_ZOOM_256, zoom_runner, 0, zooming_layer, zooming_layer); + init_layer(L_ADD_ISLAND_256, add_island_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_ADD_MUSHROOM_256, add_mashroom_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_DEEP_OCEAN_256, deep_ocean_runner, 1, add_brim_layer, layer_size_identity); + init_layer(L_BIOME_256, biomes_runner, 0, layer_size_identity, layer_size_identity); + init_layer(L14_BAMBOO_256, add_bamboo_runner, 0, layer_size_identity, layer_size_identity); + init_layer(L_ZOOM_128, zoom_runner, 0, zooming_layer, zooming_layer); + init_layer(L_ZOOM_64, zoom_runner, 0, zooming_layer, zooming_layer); + init_layer(L_BIOME_EDGE_64, biome_edge_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_RIVER_INIT_256, river_init_runner, 1, layer_size_identity, layer_size_identity); + init_layer(L_ZOOM_128_HILLS, zoom_snd_runner, 1, zooming_layer, zooming_layer); + init_layer(L_ZOOM_64_HILLS, zoom_64_hills_runner, 1, zooming_brim_layer, layer_size_identity); + init_layer(L_HILLS_64, hills_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_RARE_BIOME_64, rare_biome_runner, 0, layer_size_identity, layer_size_identity); + init_layer(L_ZOOM_32, zoom_runner, 0, zooming_layer, zooming_layer); + init_layer(L_ADD_ISLAND_32, add_island_runner, 0, add_brim_layer, layer_size_identity); + init_layer(L_ZOOM_16, zoom_runner, 0, zooming_layer, zooming_layer); + init_layer(L_SHORE_16, shore_runner, 0, add_brim_layer, layer_size_identity); +} + +static const cl_int ZERO = 0; +static inline cl_int fill_primary_buffer(struct GeneratorContext* context, cl_int4 dims, size_t seed_range, const cl_event* prev, cl_event* event) { + cl_int event_count = prev ? 1 : 0; + return clEnqueueFillBuffer(context->queue, context->buffers[0], &ZERO, sizeof(cl_int), 0, seed_range*dims.s2*dims.s3*sizeof(cl_int), event_count, prev, event); +} + +int create_build_plan(struct GeneratorContext* context, int layer, int* plan, cl_int4* sizes, cl_int4 dims); +int create_hills_build_plan(struct GeneratorContext* context, int* plan, cl_int4* sizes, cl_int4 dims) { + // river branch + cl_int4 rdims = dims; + const int RIVER_LAYERS[] = {L_ZOOM_64_HILLS, L_ZOOM_128_HILLS, L_RIVER_INIT_256}; + for (int i=0; i<3; ++i) { + plan--; + sizes--; + *plan = RIVER_LAYERS[i]; + *sizes = rdims; + rdims = PARENT_SIZES[RIVER_LAYERS[i]](rdims); + } + // main branch + return 4 + create_build_plan(context, L_BIOME_EDGE_64, plan, sizes, dims); +} + +void show_build_plan(int* layer, cl_int4* sizes, int len) { + for(int i=0; i= 0; layer--, plan--, sizes--) { + *plan = layer; + *sizes = dims; + dims = PARENT_SIZES[layer](dims); + } + return len; + } else { + int parent; + switch (layer) { + case L_ZOOM_128: parent = L14_BAMBOO_256; break; + case L14_BAMBOO_256: parent = L_BIOME_256; break; + case L_ZOOM_64: parent = L_ZOOM_128; break; + case L_BIOME_EDGE_64: parent = L_ZOOM_64; break; + case L_RIVER_INIT_256: parent = L_DEEP_OCEAN_256; break; + case L_ZOOM_128_HILLS: parent = L_RIVER_INIT_256; break; + case L_ZOOM_64_HILLS: parent = L_ZOOM_128_HILLS; break; + case L_HILLS_64: + return create_hills_build_plan(context, plan, sizes, PARENT_SIZES[layer](dims)); + case L_RARE_BIOME_64: parent = L_HILLS_64; break; + case L_ZOOM_32: parent = L_RARE_BIOME_64; break; + case L_ADD_ISLAND_32: parent = L_ZOOM_32; break; + case L_ZOOM_16: parent = L_ADD_ISLAND_32; break; + case L_SHORE_16: parent = L_ZOOM_16; break; + default: + printf("Error: unable to create build plan. Invalid layer id %d\n", layer); + return 0; + } + return 1 + create_build_plan(context, parent, plan, sizes, PARENT_SIZES[layer](dims)); + } +} + +cl_int generate_layer(struct GeneratorContext* context, int layer, cl_int4 dims, size_t seed_range, cl_int* target, const cl_event* prev, cl_event* event) { + int build_plan[L_NUM]; + cl_int4 sz[L_NUM]; + cl_event events[L_NUM + 1]; + cl_int err; + + int* plan_end = build_plan + L_NUM; + int plan_len = create_build_plan(context, layer, build_plan + L_NUM, sz + L_NUM, dims); + + int* layers = build_plan + L_NUM - plan_len; + cl_int4* sizes = sz + L_NUM - plan_len; + + err = fill_primary_buffer(context, dims, seed_range, prev, &events[0]); + if (err < 0) return err; + + for (int i = 0; i < plan_len; ++i) { + int curr_layer = layers[i]; + err = KERNEL_RUNNERS[curr_layer](context, (cl_int)curr_layer, sizes[i], seed_range, &events[i], &events[i + 1]); + if (err < 0) return err; + } + cl_mem* buffer = context->buffer_layout[LAYER_BUFFER[layer]]; + return clEnqueueReadBuffer( + context->queue, *buffer, CL_FALSE, + 0, sizeof(cl_int) * dims.s2 * dims.s3 * seed_range, target, + 1, &events[plan_len], event + ); +} + +cl_int init_generator_context(struct GeneratorContext* context, int version, size_t width, size_t height, size_t seed_range) { + init_layer_decendants(); + context->seed_range = seed_range; + cl_int err; + cl_platform_id platform; + int new_width = width + 2 * L_NUM; // Most commonly, parent layers add 1 cell brim around child area. + int new_height = height + 2 * L_NUM; + if ((err = clGetPlatformIDs(1, &platform, NULL)) < 0) { + return err; + } + cl_device_id device; + if ((err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL)) < 0) { + return err; + } + context->context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); + if (err < 0) { + return err; + } + context->queue = clCreateCommandQueueWithProperties(context->context, device, (cl_queue_properties*) NULL, &err); + if (err < 0) { + clReleaseContext(context->context); + return err; + } + // This buffer holds info about layer seeds. Context will be initialized by `set_layer_seed` call. + context->layersBuffer = clCreateBuffer(context->context, CL_MEM_READ_ONLY, sizeof(struct CLLayer) * L_NUM * seed_range, NULL, &err); + if (err < 0) { + clReleaseContext(context->context); + return err; + } + // This buffer holds info about biomes. + context->biomesBuffer = clCreateBuffer(context->context, CL_MEM_READ_ONLY, sizeof(Biome) * 256, NULL, &err); + if (err < 0) { + clReleaseContext(context->context); + return err; + } + initBiomes(); + clEnqueueWriteBuffer(context->queue, context->biomesBuffer, CL_TRUE, 0, sizeof(Biome) * 256, biomes, 0, NULL, NULL); + // Init buffers for layer data. + for (int i = 0; i < BUFFER_COUNT; ++i) { + context->buffers[i] = clCreateBuffer(context->context, CL_MEM_READ_WRITE, seed_range * new_width * new_height * sizeof(cl_int), NULL, &err); + if (err < 0) { + clReleaseContext(context->context); + return err; + } + context->buffer_layout[i] = &context->buffers[i]; + } + // Load program. + FILE* program = fopen("ocl_kernels.cl", "r"); + if (!program) { + clReleaseContext(context->context); + return -1; + } + // 256 KiB should be enough? + // #TODO: handle file loading properly. + const int SIZE = 1024 * 256; + char* buffer = (char*) malloc(SIZE); + if (!buffer) { + fclose(program); + clReleaseContext(context->context); + return -1; + } + int length = fread(buffer, 1, SIZE, program); + if (length < 0) { + fclose(program); + free(buffer); + clReleaseContext(context->context); + return -1; + } + fclose(program); + context->program = clCreateProgramWithSource(context->context, 1, (const char**)&buffer, (size_t*)NULL, &err); + free(buffer); + if (err < 0) { + clReleaseContext(context->context); + return err; + } + err = clBuildProgram(context->program, 1, &device, NULL, NULL, NULL); + if (err < 0) { + size_t size = 256*1024*sizeof(char); + char *buildlog = malloc(size); + int err2 = clGetProgramBuildInfo(context->program, device, CL_PROGRAM_BUILD_LOG, size, buildlog, NULL); + if (err2 < 0) { + printf("While handling failed build: error %d\n", err2); + } + printf("%s\n", buildlog); + clReleaseContext(context->context); + return err; + } + // Select available kernels. Order must confirm to `Kernels` enumeration. + const char* kernel_names[KERNEL_COUNT] = { + "mapIsland", + "mapZoomIsland", + "mapAddIsland", + "mapZoom", + "mapRemoveTooMuchOcean", + "mapAddSnow", + "mapCoolWarm", + "mapHeatIce", + "mapSpecial", + "mapAddMushroomIsland", + "mapDeepOcean", + "mapBiomes", + "mapAddBamboo", + "mapBiomeEdge", + "mapRiverInit", + "mapHills13", + "removeBrim", + "mapRareBiome", + "mapShore", + + "setSeed" + }; + for (int i=0; ikernels[i] = clCreateKernel(context->program, kernel_names[i], &err); + if (err < 0) { + clReleaseContext(context->context); + return err; + } + } + + return CL_SUCCESS; +} + +void release_generator_context(struct GeneratorContext* context) { + clReleaseContext(context->context); +} + +/// Initialize world seeds in range of (seed..seed+seed_range) +cl_int set_world_seed(struct GeneratorContext* context, int64_t seed, cl_event* event) { + cl_kernel kernel = context->kernels[SET_SEED]; + cl_int err = clSetKernelArg(kernel, 0, sizeof(int64_t), &seed); + if (err < 0) return err; + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &context->layersBuffer); + if (err < 0) return err; + return clEnqueueNDRangeKernel( + context->queue, kernel, 1, + NULL, &context->seed_range, NULL, + 0, NULL, event + ); +} \ No newline at end of file diff --git a/ocl_generator.h b/ocl_generator.h new file mode 100644 index 00000000..18c558ca --- /dev/null +++ b/ocl_generator.h @@ -0,0 +1,70 @@ +#define CL_TARGET_OPENCL_VERSION 220 +#include +#include "generator.h" + +enum Buffers { + BUF_PRIMARY = 0, + BUF_SECONDARY, + BUF_TMP, + + BUFFER_COUNT +}; + +enum Kernels { + KER_MAP_ISLAND, + KER_MAP_ZOOM_ISLAND, + KER_MAP_ADD_ISLAND, + KER_MAP_ZOOM, + KER_REMOVE_OCEAN, + KER_ADD_SNOW, + KER_COOL_WARM, + KER_HEAT_ICE, + KER_SPECIAL, + KER_ADD_MUSHROOM, + KER_DEEP_OCEAN, + KER_BIOMES, + KER_ADD_BAMBOO, + KER_BIOME_EDGE, + KER_RIVER_INIT, + KER_HILLS, + KER_RM_BRIM, + KER_MAP_RARE_BIOMES, + KER_SHORE, + + SET_SEED, + + KERNEL_COUNT +}; + +struct CLLayer { + int64_t startSeed; + int64_t startSalt; +}; + +struct CLLayerStack { + struct CLLayer layers[L_NUM]; +}; + +struct GeneratorContext { + cl_context context; + cl_command_queue queue; + cl_mem biomesBuffer; + cl_mem layersBuffer; + cl_mem buffers[BUFFER_COUNT]; + cl_mem* buffer_layout[BUFFER_COUNT]; + cl_program program; + size_t seed_range; + int version; + + cl_kernel kernels[KERNEL_COUNT]; + + struct CLLayerStack stack; +}; + +cl_int init_generator_context(struct GeneratorContext* context, int version, size_t width, size_t height, size_t seed_range); +cl_int set_world_seed(struct GeneratorContext* context, int64_t seed, cl_event* event); +/// Before buffer `target` can be used, user must wait for queue to finish, by +/// clWaitForEvent for event. +cl_int generate_layer(struct GeneratorContext* context, int layer, cl_int4 dims, size_t seed_range, cl_int* target, const cl_event* prev, cl_event* event); + +void release_generator_context(struct GeneratorContext* context); \ No newline at end of file diff --git a/ocl_kernels.cl b/ocl_kernels.cl new file mode 100644 index 00000000..b5a3efbd --- /dev/null +++ b/ocl_kernels.cl @@ -0,0 +1,1323 @@ +typedef long int64_t; +typedef unsigned long uint64_t; + +enum { + L_ISLAND_4096 = 0, + L_ZOOM_2048, + L_ADD_ISLAND_2048, + L_ZOOM_1024, + L_ADD_ISLAND_1024A, + L_ADD_ISLAND_1024B, + L_ADD_ISLAND_1024C, + L_REMOVE_OCEAN_1024, + L_ADD_SNOW_1024, + L_ADD_ISLAND_1024D, + L_COOL_WARM_1024, + L_HEAT_ICE_1024, + L_SPECIAL_1024, /* Good entry for: temperature categories */ + L_ZOOM_512, + L_ZOOM_256, + L_ADD_ISLAND_256, + L_ADD_MUSHROOM_256, /* Good entry for: mushroom biomes */ + L_DEEP_OCEAN_256, + L_BIOME_256, /* Good entry for: major biome types */ + L_ZOOM_128, + L_ZOOM_64, + L_BIOME_EDGE_64, + L_RIVER_INIT_256, + L_ZOOM_128_HILLS, + L_ZOOM_64_HILLS, + L_HILLS_64, /* Good entry for: minor biome types */ + L_RARE_BIOME_64, + L_ZOOM_32, + L_ADD_ISLAND_32, + L_ZOOM_16, + L_SHORE_16, + L_ZOOM_8, + L_ZOOM_4, + L_SMOOTH_4, + L_ZOOM_128_RIVER, + L_ZOOM_64_RIVER, + L_ZOOM_32_RIVER, + L_ZOOM_16_RIVER, + L_ZOOM_8_RIVER, + L_ZOOM_4_RIVER, + L_RIVER_4, + L_SMOOTH_4_RIVER, + L_RIVER_MIX_4, + L_VORONOI_ZOOM_1, + + // 1.13 layers + L13_OCEAN_TEMP_256, + L13_ZOOM_128, + L13_ZOOM_64, + L13_ZOOM_32, + L13_ZOOM_16, + L13_ZOOM_8, + L13_ZOOM_4, + L13_OCEAN_MIX_4, + + // 1.14 layers + L14_BAMBOO_256, + + // largeBiomes layers + L_ZOOM_LARGE_BIOME_A, + L_ZOOM_LARGE_BIOME_B, + + L_NUM +}; + +struct Biome { + int id; + int type; + double height; + double temp; + int tempCat; + int mutated; +}; + +enum BiomeID +{ + none = -1, + // 0 + ocean = 0, + plains, + desert, + mountains, extremeHills = mountains, + forest, + taiga, + swamp, swampland = swamp, + river, + nether_wastes, hell = nether_wastes, + the_end, sky = the_end, + // 10 + frozen_ocean, frozenOcean = frozen_ocean, + frozen_river, frozenRiver = frozen_river, + snowy_tundra, icePlains = snowy_tundra, + snowy_mountains, iceMountains = snowy_mountains, + mushroom_fields, mushroomIsland = mushroom_fields, + mushroom_field_shore, mushroomIslandShore = mushroom_field_shore, + beach, + desert_hills, desertHills = desert_hills, + wooded_hills, forestHills = wooded_hills, + taiga_hills, taigaHills = taiga_hills, + // 20 + mountain_edge, extremeHillsEdge = mountain_edge, + jungle, + jungle_hills, jungleHills = jungle_hills, + jungle_edge, jungleEdge = jungle_edge, + deep_ocean, deepOcean = deep_ocean, + stone_shore, stoneBeach = stone_shore, + snowy_beach, coldBeach = snowy_beach, + birch_forest, birchForest = birch_forest, + birch_forest_hills, birchForestHills = birch_forest_hills, + dark_forest, roofedForest = dark_forest, + // 30 + snowy_taiga, coldTaiga = snowy_taiga, + snowy_taiga_hills, coldTaigaHills = snowy_taiga_hills, + giant_tree_taiga, megaTaiga = giant_tree_taiga, + giant_tree_taiga_hills, megaTaigaHills = giant_tree_taiga_hills, + wooded_mountains, extremeHillsPlus = wooded_mountains, + savanna, + savanna_plateau, savannaPlateau = savanna_plateau, + badlands, mesa = badlands, + wooded_badlands_plateau, mesaPlateau_F = wooded_badlands_plateau, + badlands_plateau, mesaPlateau = badlands_plateau, + // 40 -- 1.13 + small_end_islands, + end_midlands, + end_highlands, + end_barrens, + warm_ocean, warmOcean = warm_ocean, + lukewarm_ocean, lukewarmOcean = lukewarm_ocean, + cold_ocean, coldOcean = cold_ocean, + deep_warm_ocean, warmDeepOcean = deep_warm_ocean, + deep_lukewarm_ocean, lukewarmDeepOcean = deep_lukewarm_ocean, + deep_cold_ocean, coldDeepOcean = deep_cold_ocean, + // 50 + deep_frozen_ocean, frozenDeepOcean = deep_frozen_ocean, + BIOME_NUM, + + the_void = 127, + + // mutated variants + sunflower_plains = plains+128, + desert_lakes = desert+128, + gravelly_mountains = mountains+128, + flower_forest = forest+128, + taiga_mountains = taiga+128, + swamp_hills = swamp+128, + ice_spikes = snowy_tundra+128, + modified_jungle = jungle+128, + modified_jungle_edge = jungle_edge+128, + tall_birch_forest = birch_forest+128, + tall_birch_hills = birch_forest_hills+128, + dark_forest_hills = dark_forest+128, + snowy_taiga_mountains = snowy_taiga+128, + giant_spruce_taiga = giant_tree_taiga+128, + giant_spruce_taiga_hills = giant_tree_taiga_hills+128, + modified_gravelly_mountains = wooded_mountains+128, + shattered_savanna = savanna+128, + shattered_savanna_plateau = savanna_plateau+128, + eroded_badlands = badlands+128, + modified_wooded_badlands_plateau = wooded_badlands_plateau+128, + modified_badlands_plateau = badlands_plateau+128, + // 1.14 + bamboo_jungle = 168, + bamboo_jungle_hills = 169, + // 1.16 + soul_sand_valley = 170, + crimson_forest = 171, + warped_forest = 172, + basalt_deltas = 173, +}; + +enum BiomeType +{ + Void = -1, + Ocean, Plains, Desert, Hills, Forest, Taiga, Swamp, River, Nether, Sky, Snow, MushroomIsland, Beach, Jungle, StoneBeach, Savanna, Mesa, + BTYPE_NUM +}; + +enum BiomeTempCategory +{ + Oceanic, Warm, Lush, Cold, Freezing, Special +}; + + +struct Layer { + int64_t startSeed; + int64_t startSalt; +}; + +static inline int mcFirstIsZero(int64_t s, int mod) { + return (int)((s >> 24) % mod) == 0; +} + +static inline int64_t mcStepSeed(int64_t s, int64_t salt) { + return s * (s * 6364136223846793005LL + 1442695040888963407LL) + salt; +} + +static inline int mcFirstInt(int64_t s, int mod) +{ + int ret = (int)((s >> 24) % mod); + if (ret < 0) + ret += mod; + return ret; +} + +static inline int64_t getChunkSeed(int64_t ss, int x, int z) { + int64_t cs = ss + x; + cs = mcStepSeed(cs, z); + cs = mcStepSeed(cs, x); + cs = mcStepSeed(cs, z); + return cs; +} + +static inline int isShallowOcean(int id) +{ + const uint64_t shallow_bits = + (1ULL << ocean) | + (1ULL << frozen_ocean) | + (1ULL << warm_ocean) | + (1ULL << lukewarm_ocean) | + (1ULL << cold_ocean); + return id < 64 && ((1ULL << id) & shallow_bits); +} + +static inline int isDeepOcean(int id) +{ + const uint64_t deep_bits = + (1ULL << deep_ocean) | + (1ULL << deep_warm_ocean) | + (1ULL << deep_lukewarm_ocean) | + (1ULL << deep_cold_ocean) | + (1ULL << deep_frozen_ocean); + return id < 64 && ((1ULL << id) & deep_bits); +} + +static inline int getBiomeType(__constant struct Biome* biomes, int id) +{ + return (id & (~0xff)) ? Void : biomes[id].type; +} + +static inline int biomeExists(__constant struct Biome* biomes, int id) +{ + return !(id & (~0xff)) && !(biomes[id].id & (~0xff)); +} + +static inline int areSimilar113(__constant struct Biome* biomes, int id1, int id2) +{ + if (id1 == id2) return 1; + if (id1 == wooded_badlands_plateau || id1 == badlands_plateau) + return id2 == wooded_badlands_plateau || id2 == badlands_plateau; + if (!biomeExists(biomes, id1) || !biomeExists(biomes, id2)) return 0; + return getBiomeType(biomes, id1) == getBiomeType(biomes, id2); +} + +static inline int replaceEdge(__constant struct Biome* biomes, __global int *out, int idx, int v10, int v21, int v01, int v12, int id, int baseID, int edgeID) { + if (id != baseID) return 0; + + // areSimilar() has not changed behaviour for ids < 128, so use the faster variant + if (areSimilar113(biomes, v10, baseID) && areSimilar113(biomes, v21, baseID) && + areSimilar113(biomes, v01, baseID) && areSimilar113(biomes, v12, baseID)) + out[idx] = id; + else + out[idx] = edgeID; + + return 1; +} + +static inline int isOceanic(int id) +{ + const uint64_t ocean_bits = + (1ULL << ocean) | + (1ULL << frozen_ocean) | + (1ULL << warm_ocean) | + (1ULL << lukewarm_ocean) | + (1ULL << cold_ocean) | + (1ULL << deep_ocean) | + (1ULL << deep_warm_ocean) | + (1ULL << deep_lukewarm_ocean) | + (1ULL << deep_cold_ocean) | + (1ULL << deep_frozen_ocean); + return id < 64 && ((1ULL << id) & ocean_bits); +} + +static inline int64_t getLayerSeed(int64_t salt) +{ + int64_t ls = mcStepSeed(salt, salt); + ls = mcStepSeed(ls, salt); + ls = mcStepSeed(ls, salt); + return ls; +} + +void set_layer_seed(__global struct Layer* layer, int64_t salt, int64_t worldSeed) { + salt = getLayerSeed(salt); + int64_t st = worldSeed; + st = mcStepSeed(st, salt); + st = mcStepSeed(st, salt); + st = mcStepSeed(st, salt); + + layer->startSalt = st; + layer->startSeed = mcStepSeed(st, 0); +} + +__kernel void setSeed(int64_t seed_start, __global struct Layer* layers) { + int offset = get_global_id(0) * L_NUM; + int seed = seed_start + get_global_id(0); + set_layer_seed(&layers[offset + L_ISLAND_4096], 1, seed); + set_layer_seed(&layers[offset + L_ZOOM_2048], 2000, seed); + set_layer_seed(&layers[offset + L_ADD_ISLAND_2048], 1, seed); + set_layer_seed(&layers[offset + L_ZOOM_1024], 2001, seed); + set_layer_seed(&layers[offset + L_ADD_ISLAND_1024A], 2, seed); + set_layer_seed(&layers[offset + L_ADD_ISLAND_1024B], 50, seed); + set_layer_seed(&layers[offset + L_ADD_ISLAND_1024C], 70, seed); + set_layer_seed(&layers[offset + L_REMOVE_OCEAN_1024], 2, seed); + set_layer_seed(&layers[offset + L_ADD_SNOW_1024], 2, seed); + set_layer_seed(&layers[offset + L_ADD_ISLAND_1024D], 3, seed); + set_layer_seed(&layers[offset + L_COOL_WARM_1024], 2, seed); + set_layer_seed(&layers[offset + L_HEAT_ICE_1024], 2, seed); + set_layer_seed(&layers[offset + L_SPECIAL_1024], 3, seed); + set_layer_seed(&layers[offset + L_ZOOM_512], 2002, seed); + set_layer_seed(&layers[offset + L_ZOOM_256], 2003, seed); + set_layer_seed(&layers[offset + L_ADD_ISLAND_256], 4, seed); + set_layer_seed(&layers[offset + L_ADD_MUSHROOM_256], 5, seed); + set_layer_seed(&layers[offset + L_DEEP_OCEAN_256], 4, seed); + set_layer_seed(&layers[offset + L_BIOME_256], 200, seed); + set_layer_seed(&layers[offset + L14_BAMBOO_256], 1001, seed); + set_layer_seed(&layers[offset + L_ZOOM_128], 1000, seed); + set_layer_seed(&layers[offset + L_ZOOM_64], 1001, seed); + set_layer_seed(&layers[offset + L_BIOME_EDGE_64], 1000, seed); + set_layer_seed(&layers[offset + L_RIVER_INIT_256], 100, seed); + set_layer_seed(&layers[offset + L_ZOOM_128_HILLS], 1000, seed); + set_layer_seed(&layers[offset + L_ZOOM_64_HILLS], 1001, seed); + set_layer_seed(&layers[offset + L_HILLS_64], 1000, seed); + set_layer_seed(&layers[offset + L_RARE_BIOME_64], 1001, seed); + set_layer_seed(&layers[offset + L_ZOOM_32], 1000, seed); + set_layer_seed(&layers[offset + L_ADD_ISLAND_32], 3, seed); + set_layer_seed(&layers[offset + L_ZOOM_16], 1001, seed); + set_layer_seed(&layers[offset + L_SHORE_16], 1000, seed); +} + +__kernel void mapIsland(__constant struct Layer* layer, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + + int i = (int)get_global_id(0); + int j = (int)get_global_id(1); + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + int64_t ss = layer[seed_offset].startSeed; + // Frame in seed range + in = in + w * h * get_global_id(2); + out = out + w * h * get_global_id(2); + + int64_t cs = getChunkSeed(ss, x + i, z + j); + out[i + j*w] = mcFirstIsZero(cs, 10); + + if (x > -w && x <= 0 && z > -h && z <= 0) + { + out[-x + -z * w] = 1; + } +} + +__kernel void mapZoomIsland(__constant struct Layer* layer, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int i = (int)get_global_id(0); + int j = (int)get_global_id(1); + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + int st = (int)layer[seed_offset].startSalt; + int ss = (int)layer[seed_offset].startSeed; + + int pX = x >> 1; + int pZ = z >> 1; + int pW = ((x + w) >> 1) - pX + 1; + int pH = ((z + h) >> 1) - pZ + 1; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int newW = pW << 1; + int newH = pH << 1; + + int v00 = in[(j+0)*pW + i]; + int v01 = in[(j+1)*pW + i]; + int v10 = in[(j+0)*pW + i+1]; + int v11 = in[(j+1)*pW + i+1]; + + int chunkX = (i + pX) << 1; + int chunkZ = (j + pZ) << 1; + + int cs = ss; + cs += chunkX; + cs *= cs * 1284865837 + 4150755663; + cs += chunkZ; + cs *= cs * 1284865837 + 4150755663; + cs += chunkX; + cs *= cs * 1284865837 + 4150755663; + cs += chunkZ; + + int zz = 2 * j - (z & 1); + int xx = 2 * i - (x & 1); + + bool z0 = zz >= 0 && zz < h; + bool z1 = zz + 1 < h; + bool x0 = xx >= 0 && xx < w; + bool x1 = xx + 1 < w; + + if (z0 && x0) { + out[xx + zz * w] = v00; + } + if (z1 && x0) { + out[xx + (zz + 1) * w] = (cs >> 24) & 1 ? v01 : v00; + } + + cs *= cs * 1284865837 + 4150755663; + cs += st; + + if (z0 && x1) { + out[xx + 1 + zz * w] = (cs >> 24) & 1 ? v10 : v00; + } + + cs *= cs * 1284865837 + 4150755663; + cs += st; + int r = (cs >> 24) & 3; + + if (z1 && x1) { + out[xx + 1 + (zz + 1) * w] = r==0 ? v00 : r==1 ? v10 : r==2 ? v01 : v11; + } +} + +__kernel void mapAddIsland(__constant struct Layer* layer, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + int pX = dims.s0 - 1; + int pZ = dims.s1 - 1; + int pW = dims.s2 + 2; + int pH = dims.s3 + 2; + + int64_t st = layer[seed_offset].startSalt; + int64_t ss = layer[seed_offset].startSeed; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + + int i = (int)get_global_id(0); + int j = (int)get_global_id(1); + + int v00 = in[i + j * pW]; + int v02 = in[i + (j + 2) * pW]; + int v20 = in[(i + 2) + j * pW]; + int v22 = in[(i + 2) + (j + 2) * pW]; + int v11 = in[(i + 1) + (j + 1) * pW]; + + int v = v11; + + switch (v11) { + case 0: + if (v00 != 0 || v20 != 0 || v02 != 0 || v22 != 0) { + int64_t cs = getChunkSeed(ss, i+x, j+z); + int inc = 0; + v = 1; + if (v00 != 0) { + ++inc; v = v00; + cs = mcStepSeed(cs, st); + } + if (v20 != 0) { + if (++inc == 1 || mcFirstIsZero(cs, 2)) v = v20; + cs = mcStepSeed(cs, st); + } + if (v02 != 0) { + switch (++inc) { + case 1: v = v02; break; + case 2: if (mcFirstIsZero(cs, 2)) v = v02; break; + default: if (mcFirstIsZero(cs, 3)) v = v02; + } + cs = mcStepSeed(cs, st); + } + if (v22 != 0) { + switch (++inc) { + case 1: v = v22; break; + case 2: if (mcFirstIsZero(cs, 2)) v = v22; break; + case 3: if (mcFirstIsZero(cs, 3)) v = v22; break; + default: if (mcFirstIsZero(cs, 4)) v = v22; + } + cs = mcStepSeed(cs, st); + } + + if (v != 4 && !mcFirstIsZero(cs, 3)) { + v = 0; + } + } + break; + case 4: + break; + default: + if (v00 == 0 || v20 == 0 || v02 == 0 || v22 == 0) { + int64_t cs = getChunkSeed(ss, i+x, j+z); + if (mcFirstIsZero(cs, 5)) { + v = 0; + } + } + } + + out[i + j*w] = v; +} + +__kernel void removeBrim(int4 dims, __constant int* in, __global int* out) { + int w = dims.s2; + int h = dims.s3; + int pW = w + 2; + int pH = h + 2; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = (int)get_global_id(0); + int j = (int)get_global_id(1); + out[i + j*w] = in[(i+1) + (j+1)*pW]; +} + +__kernel void mapZoom(__constant struct Layer* layer, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int i = (int)get_global_id(0); + int j = (int)get_global_id(1); + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + int st = (int)layer[seed_offset].startSalt; + int ss = (int)layer[seed_offset].startSeed; + + int pX = x >> 1; + int pZ = z >> 1; + int pW = ((x + w) >> 1) - pX + 1; + int pH = ((z + h) >> 1) - pZ + 1; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int newW = pW << 1; + int newH = pH << 1; + + int v00 = in[(j+0)*pW + i]; + int v01 = in[(j+1)*pW + i]; + int v10 = in[(j+0)*pW + i+1]; + int v11 = in[(j+1)*pW + i+1]; + + int chunkX = (i + pX) << 1; + int chunkZ = (j + pZ) << 1; + + int cs = ss; + cs += chunkX; + cs *= cs * 1284865837 + 4150755663; + cs += chunkZ; + cs *= cs * 1284865837 + 4150755663; + cs += chunkX; + cs *= cs * 1284865837 + 4150755663; + cs += chunkZ; + + int zz = 2 * j - (z & 1); + int xx = 2 * i - (x & 1); + + bool z0 = zz >= 0 && zz < h; + bool z1 = zz + 1 < h; + bool x0 = xx >= 0 && xx < w; + bool x1 = xx + 1 < w; + + if (z0 && x0) { + out[xx + zz * w] = v00; + } + if (z1 && x0) { + out[xx + (zz + 1) * w] = (cs >> 24) & 1 ? v01 : v00; + } + cs *= cs * 1284865837 + 4150755663; + cs += st; + if (z0 && x1) { + out[xx + 1 + zz * w] = (cs >> 24) & 1 ? v10 : v00; + } + int v; + if (v10 == v01 && v01 == v11) v = v10; + else if (v00 == v10 && v00 == v01) v = v00; + else if (v00 == v10 && v00 == v11) v = v00; + else if (v00 == v01 && v00 == v11) v = v00; + else if (v00 == v10 && v01 != v11) v = v00; + else if (v00 == v01 && v10 != v11) v = v00; + else if (v00 == v11 && v10 != v01) v = v00; + else if (v10 == v01 && v00 != v11) v = v10; + else if (v10 == v11 && v00 != v01) v = v10; + else if (v01 == v11 && v00 != v10) v = v01; + else + { + cs *= cs * 1284865837 + 4150755663; + cs += st; + int r = (cs >> 24) & 3; + v = r==0 ? v00 : r==1 ? v10 : r==2 ? v01 : v11; + } + if (z1 && x1) { + out[xx + 1 + (zz + 1) * w] = v; + } +} + +__kernel void mapRemoveTooMuchOcean(__constant struct Layer* layers, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + int pX = x - 1; + int pZ = z - 1; + int pW = w + 2; + int pH = h + 2; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int v10 = in[i+1 + (j+0)*pW]; + int v01 = in[i+0 + (j+1)*pW]; + int v11 = in[i+1 + (j+1)*pW]; + int v21 = in[i+2 + (j+1)*pW]; + int v12 = in[i+1 + (j+2)*pW]; + + int v = v11; + if (v10 == 0 && v01 == 0 && v11 == 0 && v21 == 0 && v12 == 0) { + int64_t ss = layers[seed_offset].startSeed; + int64_t cs = getChunkSeed(ss, i+x, j+z); + if (mcFirstIsZero(cs, 2)) { + v = 1; + } + } + out[i + j*w] = v; +} + +__kernel void mapAddSnow(__constant struct Layer* layers, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + int pX = x - 1; + int pZ = z - 1; + int pW = w + 2; + int pH = h + 2; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int v11 = in[i+1 + (j+1)*pW]; + + int64_t ss = layers[seed_offset].startSeed; + int64_t cs; + + if (isShallowOcean(v11)) { + out[i + j*w] = v11; + } else { + cs = getChunkSeed(ss, i+x, j+z); + int r = mcFirstInt(cs, 6); + int v; + + if (r == 0) v = 4; + else if (r <= 1) v = 3; + else v = 1; + + out[i + j*w] = v; + } +} + +__kernel void mapCoolWarm(__constant struct Layer* layers, int layer_id, int4 dims, __constant int* in, __global int* out) { + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + int pX = x - 1; + int pZ = z - 1; + int pW = w + 2; + int pH = h + 2; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int v11 = in[i+1 + (j+1)*pW]; + + if (v11 == 1) + { + int v10 = in[i+1 + (j+0)*pW]; + int v21 = in[i+2 + (j+1)*pW]; + int v01 = in[i+0 + (j+1)*pW]; + int v12 = in[i+1 + (j+2)*pW]; + + if (v10 == 3 || v10 == 4 || v21 == 3 || v21 == 4 || v01 == 3 || v01 == 4 || v12 == 3 || v12 == 4) + { + v11 = 2; + } + } + + out[i + j*w] = v11; +} + +__kernel void mapHeatIce(__constant struct Layer* layer, int layer_id, int4 dims, __constant int* in, __global int* out) { + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + int pX = x - 1; + int pZ = z - 1; + int pW = w + 2; + int pH = h + 2; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int v11 = in[i+1 + (j+1)*pW]; + + if (v11 == 4) { + int v10 = in[i+1 + (j+0)*pW]; + int v21 = in[i+2 + (j+1)*pW]; + int v01 = in[i+0 + (j+1)*pW]; + int v12 = in[i+1 + (j+2)*pW]; + + if (v10 == 1 || v10 == 2 || v21 == 1 || v21 == 2 || v01 == 1 || v01 == 2 || v12 == 1 || v12 == 2) + { + v11 = 3; + } + } + + out[i + j*w] = v11; +} + +__kernel void mapSpecial(__constant struct Layer* layers, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + // Frame in seed range + in = in + w * h * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int64_t st = layers[seed_offset].startSalt; + int64_t ss = layers[seed_offset].startSeed; + + int v = in[i + j*w]; + if (v != 0) { + int64_t cs = getChunkSeed(ss, i+x, j+z); + if (mcFirstIsZero(cs, 13)) { + cs = mcStepSeed(cs, st); + v |= (1 + mcFirstInt(cs, 15)) << 8 & 0xf00; + // 1 to 1 mapping so 'out' can be overwritten immediately + } + } + out[i + j*w] = v; +} + +__kernel void mapAddMushroomIsland(__constant struct Layer * layers, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + int pX = x - 1; + int pZ = z - 1; + int pW = w + 2; + int pH = h + 2; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int64_t ss = layers[seed_offset].startSeed; + int64_t cs; + + int v11 = in[i+1 + (j+1)*pW]; + // surrounded by ocean? + if (v11 == 0 && + !in[i+0 + (j+0)*pW] && !in[i+2 + (j+0)*pW] && + !in[i+0 + (j+2)*pW] && !in[i+2 + (j+2)*pW]) + { + cs = getChunkSeed(ss, i+x, j+z); + if (mcFirstIsZero(cs, 100)) + v11 = mushroom_fields; + } + + out[i + j*w] = v11; +} + +__kernel void mapDeepOcean(__constant struct Layer* layers, int layer_id, int4 dims, __constant int* in, __global int* out) { + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + int pX = x - 1; + int pZ = z - 1; + int pW = w + 2; + int pH = h + 2; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int v11 = in[(i+1) + (j+1)*pW]; + + if (isShallowOcean(v11)) + { + // count adjacent oceans + int oceans = 0; + if (isShallowOcean(in[(i+1) + (j+0)*pW])) oceans++; + if (isShallowOcean(in[(i+2) + (j+1)*pW])) oceans++; + if (isShallowOcean(in[(i+0) + (j+1)*pW])) oceans++; + if (isShallowOcean(in[(i+1) + (j+2)*pW])) oceans++; + + if (oceans >= 4) + { + switch (v11) + { + case warm_ocean: + v11 = deep_warm_ocean; + break; + case lukewarm_ocean: + v11 = deep_lukewarm_ocean; + break; + case ocean: + v11 = deep_ocean; + break; + case cold_ocean: + v11 = deep_cold_ocean; + break; + case frozen_ocean: + v11 = deep_frozen_ocean; + break; + default: + v11 = deep_ocean; + } + } + } + out[i + j*w] = v11; +} + +__constant int warmBiomes[] = {desert, desert, desert, savanna, savanna, plains}; +__constant int lushBiomes[] = {forest, dark_forest, mountains, plains, birch_forest, swamp}; +__constant int coldBiomes[] = {forest, mountains, taiga, plains}; +__constant int snowBiomes[] = {snowy_tundra, snowy_tundra, snowy_tundra, snowy_taiga}; + +__kernel void mapBiomes(__constant struct Layer* layers, int layer_id, int4 dims, __constant struct Biome* biomes, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + // Frame in seed range + in = in + w * h * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int64_t ss = layers[seed_offset].startSeed; + int64_t cs; + + int idx = i + j*w; + int id = in[idx]; + int hasHighBit = (id & 0xf00); + id &= ~0xf00; + + if (getBiomeType(biomes, id) == Ocean || id == mushroom_fields) { + out[idx] = id; + } else { + cs = getChunkSeed(ss, i + x, j + z); + switch (id) + { + case Warm: + if (hasHighBit) out[idx] = mcFirstIsZero(cs, 3) ? badlands_plateau : wooded_badlands_plateau; + else out[idx] = warmBiomes[mcFirstInt(cs, 6)]; + break; + case Lush: + if (hasHighBit) out[idx] = jungle; + else out[idx] = lushBiomes[mcFirstInt(cs, 6)]; + break; + case Cold: + if (hasHighBit) out[idx] = giant_tree_taiga; + else out[idx] = coldBiomes[mcFirstInt(cs, 4)]; + break; + case Freezing: + out[idx] = snowBiomes[mcFirstInt(cs, 4)]; + break; + default: + out[idx] = mushroom_fields; + } + } +} + +__kernel void mapAddBamboo(__constant struct Layer* layers, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + // Frame in seed range + in = in + w * h * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int64_t ss = layers[seed_offset].startSeed; + int64_t cs; + + int idx = i + j*w; + int v = in[idx]; + if (v == jungle) { + cs = getChunkSeed(ss, i + x, j + z); + if (mcFirstIsZero(cs, 10)) { + v = bamboo_jungle; + } + } + out[idx] = v; +} + +__kernel void mapBiomeEdge(__constant struct Layer* layers, int layer_id, int4 dims, __constant struct Biome* biomes, __constant int* in, __global int* out) { + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + int pX = x - 1; + int pZ = z - 1; + int pW = w + 2; + int pH = h + 2; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int v11 = in[(i+1) + (j+1)*pW]; + int v10 = in[(i+1) + j*pW]; + int v21 = in[(i+2) + (j+1)*pW]; + int v01 = in[(i+0) + (j+1)*pW]; + int v12 = in[(i+1) + (j+2)*pW]; + + if (!replaceEdge(biomes, out, i + j*w, v10, v21, v01, v12, v11, wooded_badlands_plateau, badlands) && + !replaceEdge(biomes, out, i + j*w, v10, v21, v01, v12, v11, badlands_plateau, badlands) && + !replaceEdge(biomes, out, i + j*w, v10, v21, v01, v12, v11, giant_tree_taiga, taiga)) + { + if (v11 == desert) + { + if (v10 != snowy_tundra && v21 != snowy_tundra && v01 != snowy_tundra && v12 != snowy_tundra) + { + out[i + j*w] = v11; + } + else + { + out[i + j*w] = wooded_mountains; + } + } + else if (v11 == swamp) + { + if (v10 != desert && v21 != desert && v01 != desert && v12 != desert && + v10 != snowy_taiga && v21 != snowy_taiga && v01 != snowy_taiga && v12 != snowy_taiga && + v10 != snowy_tundra && v21 != snowy_tundra && v01 != snowy_tundra && v12 != snowy_tundra) + { + if (v10 != jungle && v12 != jungle && v21 != jungle && v01 != jungle && + v10 != bamboo_jungle && v12 != bamboo_jungle && + v21 != bamboo_jungle && v01 != bamboo_jungle) + out[i + j*w] = v11; + else + out[i + j*w] = jungle_edge; + } + else + { + out[i + j*w] = plains; + } + } + else + { + out[i + j*w] = v11; + } + } +} + +__kernel void mapRiverInit(__constant struct Layer* layers, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + // Frame in seed range + in = in + w * h * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int64_t ss = layers[seed_offset].startSeed; + int64_t cs; + + if (in[i + j*w] > 0) { + cs = getChunkSeed(ss, i + x, j + z); + out[i + j*w] = mcFirstInt(cs, 299999)+2; + } else { + out[i + j*w] = 0; + } +} + +__kernel void mapHills13(__constant struct Layer* layers, int layer_id, int4 dims, __constant struct Biome* biomes, __constant int* in1, __constant int* in2, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + int pX = x - 1; + int pZ = z - 1; + int pW = w + 2; + int pH = h + 2; + + // Frame in seed range + in2 = in2 + pW * pH * get_global_id(2); + in1 = in1 + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int64_t st = layers[seed_offset].startSalt; + int64_t ss = layers[seed_offset].startSeed; + int64_t cs; + + int a11 = in1[i+1 + (j+1)*pW]; // biome branch + int b11 = in2[i+1 + (j+1)*pW]; // river branch + int idx = i + j*w; + + int bn = (b11 - 2) % 29; + + if (!isShallowOcean(a11) && b11 >= 2 && bn == 1) + { + int m = biomes[a11].mutated; + if (m > 0) + out[idx] = m; + else + out[idx] = a11; + } + else + { + cs = getChunkSeed(ss, i + x, j + z); + if (bn == 0 || mcFirstIsZero(cs, 3)) + { + int hillID = a11; + + switch(a11) + { + case desert: + hillID = desert_hills; break; + case forest: + hillID = wooded_hills; break; + case birch_forest: + hillID = birch_forest_hills; break; + case dark_forest: + hillID = plains; break; + case taiga: + hillID = taiga_hills; break; + case giant_tree_taiga: + hillID = giant_tree_taiga_hills; break; + case snowy_taiga: + hillID = snowy_taiga_hills; break; + case plains: + cs = mcStepSeed(cs, st); + hillID = mcFirstIsZero(cs, 3) ? wooded_hills : forest; break; + case snowy_tundra: + hillID = snowy_mountains; break; + case jungle: + hillID = jungle_hills; break; + case bamboo_jungle: + hillID = bamboo_jungle_hills; break; + case ocean: + hillID = deep_ocean; break; + case mountains: + hillID = wooded_mountains; break; + case savanna: + hillID = savanna_plateau; break; + default: + if (areSimilar113(biomes, a11, wooded_badlands_plateau)) + hillID = badlands; + else if (isDeepOcean(a11)) + { + cs = mcStepSeed(cs, st); + if (mcFirstIsZero(cs, 3)) + { + cs = mcStepSeed(cs, st); + hillID = mcFirstIsZero(cs, 2) ? plains : forest; + } + } + break; + } + + if (bn == 0 && hillID != a11) + { + hillID = biomes[hillID].mutated; + if (hillID < 0) + hillID = a11; + } + + if (hillID != a11) + { + int a10 = in1[i+1 + (j+0)*pW]; + int a21 = in1[i+2 + (j+1)*pW]; + int a01 = in1[i+0 + (j+1)*pW]; + int a12 = in1[i+1 + (j+2)*pW]; + int equals = 0; + + if (areSimilar113(biomes, a10, a11)) equals++; + if (areSimilar113(biomes, a21, a11)) equals++; + if (areSimilar113(biomes, a01, a11)) equals++; + if (areSimilar113(biomes, a12, a11)) equals++; + + if (equals >= 3) + out[idx] = hillID; + else + out[idx] = a11; + } + else + { + out[idx] = a11; + } + } + else + { + out[idx] = a11; + } + } + +} + +__kernel void mapRareBiome(__constant struct Layer* layers, int layer_id, int4 dims, __constant int* in, __global int* out) { + int seed_offset = layer_id + (int)get_global_id(2) * L_NUM; + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + // Frame in seed range + in = in + w * h * get_global_id(2); + out = out + w * h * get_global_id(2); + + int i = get_global_id(0); + int j = get_global_id(1); + + int64_t ss = layers[seed_offset].startSeed; + int64_t cs; + + int v = in[i + j * w]; + + if (v == plains) + { + cs = getChunkSeed(ss, i + x, j + z); + if (mcFirstIsZero(cs, 57)) + { + // Sunflower Plains + v = plains + 128; + } + } + out[i + j*w] = v; +} + +inline static int isBiomeJFTO(__constant struct Biome* biomes, int id) +{ + return biomeExists(biomes, id) && (getBiomeType(biomes, id) == Jungle || id == forest || id == taiga || isOceanic(id)); +} + +static inline int isBiomeSnowy(__constant struct Biome* biomes, int id) +{ + return biomeExists(biomes, id) && biomes[id].temp < 0.1; +} + +inline static int replaceOcean(__global int *out, int idx, int v10, int v21, int v01, int v12, int id, int replaceID) +{ + if (isOceanic(id)) return 0; + + if (!isOceanic(v10) && !isOceanic(v21) && !isOceanic(v01) && !isOceanic(v12)) + out[idx] = id; + else + out[idx] = replaceID; + + return 1; +} + +__kernel void mapShore(__constant struct Layer* layers, int layer_id, int4 dims, __constant struct Biome* biomes, __constant int* in, __global int* out) { + int x = dims.s0; + int z = dims.s1; + int w = dims.s2; + int h = dims.s3; + + int i = get_global_id(0); + int j = get_global_id(1); + + int pX = x - 1; + int pZ = z - 1; + int pW = w + 2; + int pH = h + 2; + + // Frame in seed range + in = in + pW * pH * get_global_id(2); + out = out + w * h * get_global_id(2); + + int v11 = in[(j+1) * pW + i+1]; + int v10 = in[j * pW + i+1]; + int v21 = in[(j+1) * pW + i+2]; + int v01 = in[(j+1) * pW + i+0]; + int v12 = in[(j+2) * pW + i+1]; + + int biome = biomeExists(biomes, v11) ? v11 : 0; + + if (v11 == mushroom_fields) + { + if (v10 != ocean && v21 != ocean && v01 != ocean && v12 != ocean) + out[i + j*w] = v11; + else + out[i + j*w] = mushroom_field_shore; + } + else if (/*biome < 128 &&*/ getBiomeType(biomes, biome) == Jungle) + { + if (isBiomeJFTO(biomes, v10) && isBiomeJFTO(biomes, v21) && isBiomeJFTO(biomes, v01) && isBiomeJFTO(biomes, v12)) + { + if (!isOceanic(v10) && !isOceanic(v21) && !isOceanic(v01) && !isOceanic(v12)) + out[i + j*w] = v11; + else + out[i + j*w] = beach; + } + else + { + out[i + j*w] = jungle_edge; + } + } + else if (v11 != mountains && v11 != wooded_mountains && v11 != mountain_edge) + { + if (isBiomeSnowy(biomes, biome)) + { + replaceOcean(out, i + j*w, v10, v21, v01, v12, v11, snowy_beach); + } + else if (v11 != badlands && v11 != wooded_badlands_plateau) + { + if (v11 != ocean && v11 != deep_ocean && v11 != river && v11 != swamp) + { + if (!isOceanic(v10) && !isOceanic(v21) && !isOceanic(v01) && !isOceanic(v12)) + out[i + j*w] = v11; + else + out[i + j*w] = beach; + } + else + { + out[i + j*w] = v11; + } + } + else + { + if (!isOceanic(v10) && !isOceanic(v21) && !isOceanic(v01) && !isOceanic(v12)) + { + if (getBiomeType(biomes, v10) == Mesa && getBiomeType(biomes, v21) == Mesa && getBiomeType(biomes, v01) == Mesa && getBiomeType(biomes, v12) == Mesa) + out[i + j*w] = v11; + else + out[i + j*w] = desert; + } + else + { + out[i + j*w] = v11; + } + } + } + else + { + replaceOcean(out, i + j*w, v10, v21, v01, v12, v11, stone_shore); + } +} \ No newline at end of file diff --git a/ocl_tests.c b/ocl_tests.c new file mode 100644 index 00000000..51efd8c3 --- /dev/null +++ b/ocl_tests.c @@ -0,0 +1,104 @@ +#include "ocl_generator.h" +#include "generator.h" +#include + +#define W 256 +#define H 256 + +#define MAX_ERRORS 128 +#define SEED_RANGE 32 +#define START_SEED 1 + +struct Error { + int layer; + int seed; + int x; + int z; + int ocl; + int expected; +}; + +cl_int test_layer(struct GeneratorContext* context, LayerStack* stack, struct Error* errors, int* error_count, int* bufferB, int layer) { + cl_int err; + cl_event event; + + cl_int4 dims = {{0, 0, W, H}}; + err = generate_layer(context, layer, dims, SEED_RANGE, bufferB, NULL, &event); + if (err < 0) { + printf("layer=%d err=%d", layer, err); + return err; + } + err = clWaitForEvents(1, &event); + if (err < 0) { + printf("layer=%d err=%d", layer, err); + return err; + } + int* bufferA = allocCache(&stack->layers[layer], W, H); + for (int s = 0; s < SEED_RANGE; ++s) { + applySeed(stack, START_SEED + s); + genArea(&stack->layers[layer], bufferA, dims.s0, dims.s1, W, H); + for (int j = 0; j < H && *error_count <= MAX_ERRORS; ++j) { + for (int i = 0; i < W && *error_count <= MAX_ERRORS; ++i) { + if (bufferA[i + j*W] != bufferB[i + j*W + s*W*H]) { + errors[*error_count].seed = START_SEED + s; + errors[*error_count].layer = layer; + errors[*error_count].x = i; + errors[*error_count].z = j; + errors[*error_count].expected = bufferA[i + j*W + s*W*H]; + errors[*error_count].ocl = bufferB[i + j*W + s*W*H]; + (*error_count) += 1; + } + } + } + } + free(bufferA); + if (*error_count == 0) { + printf("Layer %d is good. :)\n", layer); + } + return CL_SUCCESS; +} + +cl_int run_tests(struct Error* errors, int* error_count) { + initBiomes(); + LayerStack stack; + setupGenerator(&stack, MC_1_16); + + struct GeneratorContext context; + cl_int err = init_generator_context(&context, MC_1_16, SEED_RANGE, W, H); + if (err < 0) return err; + cl_event event0; + set_world_seed(&context, START_SEED, &event0); + clWaitForEvents(1, &event0); + + cl_int* bufferB = (cl_int*) malloc(SEED_RANGE * W * H * sizeof(cl_int)); + + *error_count = 0; + for (int layer=0; layer <= L_BIOME_256; ++layer) { + test_layer(&context, &stack, errors, error_count, bufferB, layer); + } + test_layer(&context, &stack, errors, error_count, bufferB, L14_BAMBOO_256); + for (int layer=L_ZOOM_128; layer <= L_SHORE_16; ++layer) { + test_layer(&context, &stack, errors, error_count, bufferB, layer); + } + + free(bufferB); + release_generator_context(&context); + return err; +} + +int main() { + struct Error errors[MAX_ERRORS]; + int error_count = 0; + cl_int err = run_tests(errors, &error_count); + if (err < 0) { + printf("CL Error: %d\n", err); + } + if (error_count > 0) { + printf("At least %d value errors encountered:\n", error_count); + for (int i = 0; i < error_count; ++i) { + printf("seed=%d x=%d z=%d \t| expected=%d \treal=%d\n", errors[i].seed, errors[i].x, errors[i].z, errors[i].expected, errors[i].ocl); + } + } else if (err == 0) { + printf("No errors.\n :)\n"); + } +} \ No newline at end of file From dd4663f11255de19c705d16dc1ef7cc1f58beca3 Mon Sep 17 00:00:00 2001 From: hukumka Date: Sat, 3 Oct 2020 09:39:39 +0000 Subject: [PATCH 2/2] Fix bug with tests failing on SEED_RANGE>H --- ocl_tests.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ocl_tests.c b/ocl_tests.c index 51efd8c3..6087063a 100644 --- a/ocl_tests.c +++ b/ocl_tests.c @@ -4,11 +4,13 @@ #define W 256 #define H 256 - -#define MAX_ERRORS 128 +// Number of seeds to be generated in single call to `generate_layer` #define SEED_RANGE 32 +// Test seeds [1..33] #define START_SEED 1 +#define MAX_ERRORS 128 + struct Error { int layer; int seed; @@ -44,7 +46,7 @@ cl_int test_layer(struct GeneratorContext* context, LayerStack* stack, struct Er errors[*error_count].layer = layer; errors[*error_count].x = i; errors[*error_count].z = j; - errors[*error_count].expected = bufferA[i + j*W + s*W*H]; + errors[*error_count].expected = bufferA[i + j*W]; errors[*error_count].ocl = bufferB[i + j*W + s*W*H]; (*error_count) += 1; } @@ -64,7 +66,7 @@ cl_int run_tests(struct Error* errors, int* error_count) { setupGenerator(&stack, MC_1_16); struct GeneratorContext context; - cl_int err = init_generator_context(&context, MC_1_16, SEED_RANGE, W, H); + cl_int err = init_generator_context(&context, MC_1_16, W, H, SEED_RANGE); if (err < 0) return err; cl_event event0; set_world_seed(&context, START_SEED, &event0);