Hey everyone! I've been working on a particle life compute shader this week. It's been going fairly well and I've been learning a lot in the process (still wrapping my head around GLSL), but I've just implemented spatial binning and some of the bugs are beyond my understanding! I feel like I must be doing something fundamentally wrong, because the individual problems feel kind of "magic," and unpredictable in that I don't understand them at all. Any help/pointers would be massively appreciated!

A few of the problems:

Particles seem to stick to the edges of the bins:

When the bin size has certain values (I can't work out which), certain sides of the simulation break. My default (which doesn't display this issue) is 400.0:

When using 80000 rather than 8000 particles, the lines are really visible. More strangely, some of the particles act fairly normally but the majority don't really seem to move at all. They just kinda sit there:

At 80000, often the top left section randomly behaves normally while the rest stays pretty static:

This is my compute shader:

#[compute]
#version 450

layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict buffer FloatParameters {
	float beta;
	float bin_size;
	float dt;
	float image_width;
	float interaction_strength;
	float r_max;
	float t_half;
	float window_width;
	float window_height;
};

layout(set = 0, binding = 1, std430) restrict buffer IntegerParameters {
	int num_bins;
	int num_bins_x;
	int num_bins_y;
	int num_colours;
	int num_particles;
};

layout(set = 0, binding = 2, std430) restrict buffer BinSum {
	int bin_sum[];
};

layout(set = 0, binding = 3, std430) restrict buffer PrefixSum {
	int prefix_sum[];
};

layout(set = 0, binding = 4, std430) restrict buffer ReindexTracker {
	int reindex_tracker[];
};

layout(set = 0, binding = 5, std430) restrict buffer Reindex {
	int reindex[];
};

layout(set = 0, binding = 6, std430) restrict buffer ColourInteractions {
	float colour_interactions[];
};

layout(set = 0, binding = 7, std430) restrict buffer ParticlePositions {
	vec2 particle_positions[];
};

layout(set = 0, binding = 8, std430) restrict buffer ParticleVelocities {
	vec2 particle_velocities[];
};

layout(set = 0, binding = 9, std430) restrict buffer ParticleColours {
	int particle_colours[];
};

layout(rgba32f, binding = 10) uniform image2D particle_data;

ivec2 indexToTexelPosition(int index) {
	int texel_x = int(mod(index, int(image_width)));
	int texel_y = index / int(image_width);
	return ivec2(texel_x, texel_y);
}

void main() {
	// Returns if the index is greater than the number of particles
	int index = int(gl_GlobalInvocationID.x);
	if (index >= num_particles) {
		return;
	}

	// Fetches this particle's properties
	vec2 position = particle_positions[index];
	vec2 velocity = particle_velocities[index];
	int colour = particle_colours[index];

	// Calculates this particle's bin index using its position and the bin size
	int bin_row = int(mod(position.y / bin_size + num_bins_y, num_bins_y));
	int bin_col = int(mod(position.x / bin_size + num_bins_x, num_bins_x));
	int bin_index = bin_row * num_bins_x + bin_col;

	// First "num_bins" indices help reset the bin sum array
	if (index < num_bins) {
		bin_sum[index] = 0;
	}

	// -------------------
	memoryBarrierBuffer();
	// -------------------

	// Increments the "bin_index" position of the bin sum array by 1
	atomicAdd(bin_sum[bin_index], 1);

	// -------------------
	memoryBarrierBuffer();
	// -------------------

	// First "num_bins" indices reset and then fill the prefix sum array
	if (index < num_bins) {
		prefix_sum[index] = 0;

		for (int i = 0; i <= index; i++) {
			prefix_sum[index] += bin_sum[i];
		}
	}

	// -------------------
	memoryBarrierBuffer();
	// -------------------

	// First "num_bins" fill the reindex tracker by shifting the prefix sum array
	if (index < num_bins) {
		reindex_tracker[index] = 0;
		if (index > 0) {
			reindex_tracker[index] = prefix_sum[index - 1];
		}
	}

	// -------------------
	memoryBarrierBuffer();
	// -------------------

	// Fill the reindex array with all particle indices in "bin_index" order
	int last_index = atomicAdd(reindex_tracker[bin_index], 1);
	reindex[last_index] = index;

	// -------------------
	memoryBarrierBuffer();
	// -------------------

	// Finds the indices and position offsets of the 9 local bins
	int local_bin_indices[9];
	vec2 local_bin_offsets[9];

	for (int y = -1; y < 2; y++) {
		for (int x = -1; x < 2; x++) {
			int array_index = (y + 1) * 3 + (x + 1);
			vec2 offset = vec2(0.0, 0.0);

			if (bin_row + y < 0) {
				offset.y = window_height;
			} else if (bin_row + y >= num_bins_y) {
				offset.y = -window_height;
			}
			if (bin_col + x < 0) {
				offset.x = window_width;
			} else if (bin_col + x >= num_bins_x) {
				offset.x = -window_width;
			}
			local_bin_offsets[array_index] = offset;

			int new_row = int(mod(float(bin_row + y + num_bins_y), float(num_bins_y)));
			int new_col = int(mod(float(bin_col + x + num_bins_x), float(num_bins_x)));
			int local_bin = new_row * num_bins_x + new_col;
			local_bin_indices[array_index] = local_bin;
		}
	}

	// Calculates the total force on the particle from particles in adjacent bins
	vec2 force = vec2(0.0, 0.0);

	for (int i = 0; i < 9; i++) {
		int adj_index = local_bin_indices[i];
		vec2 adj_offset = local_bin_offsets[i];

		for (int j = prefix_sum[adj_index - 1]; j < prefix_sum[adj_index]; j++) {
			int other_index = reindex[j];

			if (index != other_index) {
				vec2 other_pos = particle_positions[other_index] - adj_offset;
				int other_colour = particle_colours[other_index];
				float a = colour_interactions[colour * num_colours + other_colour];
				vec2 diff = other_pos - position;
				vec2 dir = normalize(diff);
				highp float r = length(diff) / r_max;

				if (r < beta) {
					force += 1.2 * (r / beta - 1.0) * dir;
				} else if (r >= beta && r < 1.0) {
					force += a * (1.0 - abs(2.0 * r - 1.0 - beta) / (1.0 - beta)) * dir;
				}
			}
		}
	}
	force *= r_max * interaction_strength;

	// Applies friction to the particle's local velocity variable
	velocity *= pow(0.5, dt / t_half);

	// Applies force and then velocity to the local particle variables
	velocity += force * dt;
	position += velocity * dt;

	// Applies screen-wrapping to the local particle variables
	position.x = mod(position.x + window_width, window_width);
	position.y = mod(position.y + window_height, window_height);

	// Sets the colour of this particle's pixel in the particle data image
	vec4 texel = vec4(position.x, position.y, colour, 0.0);
	int texel_x = int(mod(index, int(image_width)));
	int texel_y = index / int(image_width);
	ivec2 texel_position = ivec2(texel_x, texel_y);
	imageStore(particle_data, texel_position, texel);

	// Copies the updated local particle variables back into the buffers
	particle_positions[index] = position;
	particle_velocities[index] = velocity;
}

And this is my script:

extends Node


# Interface variables
var _beta := 0.3 # Default: 0.3
var _bin_size := 400.0 # Default: 400.0
var _dt := 0.005 # Default: 0.005
var _r_max := 200.0 # Default: 200.0
var _interaction_strength := 30.0 # Default: 30.0
var _t_half := 0.01 # Default: 0.01
var _num_colours := 6
var _num_particles := 8000 # Default: 8000

# Compute shader variables
var _rd := RenderingServer.create_local_rendering_device()
var _float_parameters_buffer: RID
var _integer_parameters_buffer: RID
var _particle_colours_buffer: RID
var _particle_data_buffer: RID
var _bin_sum_buffer: RID
var _prefix_sum_buffer: RID
var _reindex_tracker_buffer: RID
var _reindex_buffer: RID
var _colour_interactions_buffer: RID
var _particle_positions_buffer: RID
var _particle_velocities_buffer: RID
var _pipeline: RID
var _shader: RID
var _uniform_set: RID

# Other variables
var _float_parameters_uniform: RDUniform
var _simulation_reset_pending := false
var _binding: Array
var _particle_data: Image
var _particle_data_texture: ImageTexture
var _image_width: int
var _window_size := Vector2(3840.0, 2400.0)

@onready var particle_display := $ParticleDisplay


func _ready():
	_initialise_compute_shader()


func _process(_delta):
	%FPSLabel.text = str(min(Engine.get_frames_per_second(), 60))
	_update_particles()


func _initialise_compute_shader():
	_image_width = ceili(sqrt(_num_particles))
	_particle_data = Image.create(_image_width, _image_width, false, Image.FORMAT_RGBAF)
	_particle_data_texture = ImageTexture.create_from_image(_particle_data)
	particle_display.amount = _num_particles
	particle_display.process_material.set_shader_parameter("particleData", _particle_data_texture)

	# Create shader pipeline
	var shader_file := load("res://shaders/particle_simulation.glsl")
	_shader = _rd.shader_create_from_spirv(shader_file.get_spirv())
	_pipeline = _rd.compute_pipeline_create(_shader)

	# Create float parameters buffer
	_float_parameters_buffer = _create_float_parameters_buffer()

	# Create int parameters buffer
	var num_bins_x := ceili(_window_size.x / _bin_size)
	var num_bins_y := ceili(_window_size.y / _bin_size)
	var num_bins := num_bins_x * num_bins_y
	var integer_parameters_buffer_bytes := PackedInt32Array([
		num_bins,
		num_bins_x,
		num_bins_y,
		_num_colours,
		_num_particles,
	]).to_byte_array()
	_integer_parameters_buffer = _rd.storage_buffer_create(integer_parameters_buffer_bytes.size(), integer_parameters_buffer_bytes)

	# Create bin sum buffer
	var bin_sum_buffer_data := []
	bin_sum_buffer_data.resize(num_bins)
	_bin_sum_buffer = _create_int_array_buffer(bin_sum_buffer_data)

	# Create prefix sum buffer
	var prefix_sum_buffer_data := []
	prefix_sum_buffer_data.resize(num_bins)
	_prefix_sum_buffer = _create_int_array_buffer(prefix_sum_buffer_data)

	# Create reindex tracker buffer
	var reindex_tracker_buffer_data := []
	reindex_tracker_buffer_data.resize(num_bins)
	var reindex_tracker_buffer_bytes := PackedInt32Array(reindex_tracker_buffer_data).to_byte_array()
	_reindex_tracker_buffer = _rd.storage_buffer_create(reindex_tracker_buffer_bytes.size(), reindex_tracker_buffer_bytes)

	# Create reindex buffer
	var reindex_buffer_data := []
	reindex_buffer_data.resize(_num_particles)
	var reindex_buffer_bytes := PackedInt32Array(reindex_buffer_data).to_byte_array()
	_reindex_buffer = _rd.storage_buffer_create(reindex_buffer_bytes.size(), reindex_buffer_bytes)

	# Create colour interactions buffer
	var colour_interactions := []
	for i in _num_colours * _num_colours:
		colour_interactions.append(randf() * 2.0 - 1.0)
	var colour_interactions_bytes := PackedFloat32Array(colour_interactions).to_byte_array()
	_colour_interactions_buffer = _rd.storage_buffer_create(colour_interactions_bytes.size(), colour_interactions_bytes)

	# Create particle positions buffer
	var particle_positions := []
	for i in _num_particles:
		particle_positions.append(Vector2(randf() * _window_size.x, randf() * _window_size.y))
	_particle_positions_buffer = _create_vec2_array_buffer(particle_positions)

	# Create particle velocities buffer
	var particle_velocities := []
	for i in _num_particles:
		particle_velocities.append(Vector2.ZERO)
	_particle_velocities_buffer = _create_vec2_array_buffer(particle_velocities)

	# Create particle colours buffer
	var particle_colours := []
	for i in _num_particles:
		particle_colours.append(randi_range(0, _num_colours - 1))
	_particle_colours_buffer = _create_int_array_buffer(particle_colours)

	# Create rgba32f image format for particle data
	var rgba32f := RDTextureFormat.new()
	rgba32f.width = _image_width
	rgba32f.height = _image_width
	rgba32f.format = RenderingDevice.DATA_FORMAT_R32G32B32A32_SFLOAT
	rgba32f.usage_bits = \
			RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT | \
			RenderingDevice.TEXTURE_USAGE_SAMPLING_BIT | \
			RenderingDevice.TEXTURE_USAGE_STORAGE_BIT | \
			RenderingDevice.TEXTURE_USAGE_CAN_COPY_FROM_BIT

	# Create particle data buffer
	_particle_data_buffer = _rd.texture_create(rgba32f, RDTextureView.new(),
			[_particle_data.get_data()])

	# Create uniforms

	# Create uniform set
	_float_parameters_uniform = _create_uniform(_float_parameters_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 0)
	_binding = [
		_float_parameters_uniform,
		_create_uniform(_integer_parameters_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 1),
		_create_uniform(_bin_sum_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 2),
		_create_uniform(_prefix_sum_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 3),
		_create_uniform(_reindex_tracker_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 4),
		_create_uniform(_reindex_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 5),
		_create_uniform(_colour_interactions_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 6),
		_create_uniform(_particle_positions_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 7),
		_create_uniform(_particle_velocities_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 8),
		_create_uniform(_particle_colours_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 9),
		_create_uniform(_particle_data_buffer, RenderingDevice.UNIFORM_TYPE_IMAGE, 10),
	]
	_uniform_set = _rd.uniform_set_create(_binding, _shader, 0)


func _update_particles():
	_rd.free_rid(_float_parameters_buffer)
	_float_parameters_buffer = _create_float_parameters_buffer()
	_float_parameters_uniform.clear_ids()
	_float_parameters_uniform.add_id(_float_parameters_buffer)
	_uniform_set = _rd.uniform_set_create(_binding, _shader, 0)

	if _simulation_reset_pending:
		pass

	var compute_list := _rd.compute_list_begin()
	_rd.compute_list_bind_compute_pipeline(compute_list, _pipeline)
	_rd.compute_list_bind_uniform_set(compute_list, _uniform_set, 0)
	_rd.compute_list_dispatch(compute_list, ceili(_num_particles / 8.0), 1, 1)
	_rd.compute_list_end()

	_rd.submit()
	_rd.sync()

	var particle_data_image_data := _rd.texture_get_data(_particle_data_buffer, 0)
	_particle_data.set_data(_image_width, _image_width, false, Image.FORMAT_RGBAF, particle_data_image_data)
	_particle_data_texture.update(_particle_data)


func _create_float_parameters_buffer() -> RID:
	var buffer_data := [
		_beta,
		_bin_size,
		_dt,
		float(_image_width),
		_interaction_strength,
		_r_max,
		_t_half,
		_window_size.x,
		_window_size.y,
	]
	var buffer_bytes := PackedFloat32Array(buffer_data).to_byte_array()
	return _rd.storage_buffer_create(buffer_bytes.size(), buffer_bytes)


func _create_int_array_buffer(data: Array) -> RID:
	var buffer_bytes := PackedInt32Array(data).to_byte_array()
	return _rd.storage_buffer_create(buffer_bytes.size(), buffer_bytes)


func _create_uniform(buffer: RID, uniform_type, binding: int) -> RDUniform:
	var uniform := RDUniform.new()
	uniform.add_id(buffer)
	uniform.uniform_type = uniform_type
	uniform.binding = binding
	return uniform


func _create_vec2_array_buffer(data: Array) -> RID:
	var buffer_bytes := PackedVector2Array(data).to_byte_array()
	return _rd.storage_buffer_create(buffer_bytes.size(), buffer_bytes)


func _exit_tree():
	_rd.free_rid(_float_parameters_buffer)
	_rd.free_rid(_integer_parameters_buffer)
	_rd.free_rid(_bin_sum_buffer)
	_rd.free_rid(_prefix_sum_buffer)
	_rd.free_rid(_reindex_tracker_buffer)
	_rd.free_rid(_reindex_buffer)
	_rd.free_rid(_colour_interactions_buffer)
	_rd.free_rid(_particle_positions_buffer)
	_rd.free_rid(_particle_velocities_buffer)
	_rd.free_rid(_particle_colours_buffer)
	_rd.free_rid(_particle_data_buffer)
	_rd.free_rid(_pipeline)
	_rd.free_rid(_shader)
	_rd.free_rid(_uniform_set)
	_rd.free()

The particle data image is sent to a particle shader, which just reads the positions and colours and renders them.

I don't really expect any poor soul to read through all the code, but, I'm just tearing my hair out! The only hunch I have is that it's some kind of variable precision problem, but, yeah. Please help. 💗

Smaller sub-problem that might be easier to get help with. This shader code:

// First "num_bins" indices help reset the bin sum array
if (index < num_bins) {
	bin_sum[index] = 0;
}

// -------------------
memoryBarrierBuffer();
// -------------------

if (bin_sum[0] != 0) {
	particles[index].colour = 3;
}

Changes the colour of all pixels on the screen (when it should change none, because the bin sum buffer's just been reset. "index" goes from 0 to 7999 atm, so it's not as if index = 0 doesn't occur.