diff options
-rw-r--r-- | bin/world.py | 2 | ||||
-rw-r--r-- | include/memory.h | 13 | ||||
-rw-r--r-- | include/render.h | 26 | ||||
-rw-r--r-- | src/memory.c | 48 | ||||
-rw-r--r-- | src/render.c | 93 |
5 files changed, 120 insertions, 62 deletions
diff --git a/bin/world.py b/bin/world.py index 17b640f..15cdd21 100644 --- a/bin/world.py +++ b/bin/world.py @@ -41,7 +41,7 @@ class World: line_width = self.__printer.size[1] - self.PADDING print_area = self.__printer.size[0] * line_width c_buffer = (c_uint8 * print_area)() - self.__sim.lib.sal_mem_render_image( + self.__sim.lib.sal_ren_get_image( self.pos, self.zoom, print_area, cast(c_buffer, POINTER(c_uint8)) ) diff --git a/include/memory.h b/include/memory.h index 04284e7..6376d7c 100644 --- a/include/memory.h +++ b/include/memory.h @@ -82,19 +82,6 @@ SALIS_API void sal_mem_set_inst(uint32 address, uint8 inst); */ SALIS_API uint8 sal_mem_get_byte(uint32 address); -/** Render a 1D image of a given block of memory. This is useful, as rendering -* directly in python would be too slow. We use openmp for multi-threaded image -* generation. -* -* @param origin Low bound of rendered image -* @param cell_size Amount of bytes per rendered pixel (cell) -* @param buff_size Amount of pixels (cells) to be generated -* @param buffer Pre-allocated buffer to store the rendered pixels into -*/ -SALIS_API void sal_mem_render_image( - uint32 origin, uint32 cell_size, uint32 buff_size, uint8_p buffer -); - void _sal_mem_cycle(void); #endif diff --git a/include/render.h b/include/render.h new file mode 100644 index 0000000..85e9935 --- /dev/null +++ b/include/render.h @@ -0,0 +1,26 @@ +/** +* @file render.h +* @author Paul Oliver +* +* This module implements a multi-threaded memory render function that iterates +* over a given area of memory and returns a 1D image. OMP is used to up +* performance. +*/ + +#ifndef SALIS_RENDER_H +#define SALIS_RENDER_H + +/** Render a 1D image of a given block of memory. This is useful, as rendering +* directly in python would be too slow. We use openmp for multi-threaded image +* generation. +* +* @param origin Low bound of rendered image +* @param cell_size Amount of bytes per rendered pixel (cell) +* @param buff_size Amount of pixels (cells) to be generated +* @param buffer Pre-allocated buffer to store the rendered pixels into +*/ +SALIS_API void sal_ren_get_image( + uint32 origin, uint32 cell_size, uint32 buff_size, uint8_p buffer +); + +#endif diff --git a/src/memory.c b/src/memory.c index 1036152..b02a5b9 100644 --- a/src/memory.c +++ b/src/memory.c @@ -7,8 +7,6 @@ #include "instset.h" #include "memory.h" -#define MAX_ZOOM 0x10000 - static boolean g_is_init; static uint32 g_order; static uint32 g_size; @@ -185,52 +183,6 @@ uint8 sal_mem_get_byte(uint32 address) return g_memory[address]; } -void sal_mem_render_image( - uint32 origin, uint32 cell_size, uint32 buff_size, uint8_p buffer -) { - /* Render a 1D image of a given section of memory, at a given resolution - (zoom) and store it in a pre-allocated 'buffer'. - - On the Salis python handler we draw memory as a 1D 'image' on the WORLD - page. If we were to render this image directly on python, it would be - excruciatingly slow, as we have to iterate over large areas of memory! - Therefore, this memory module comes with a built-in, super fast renderer. - */ - uint32 i; - assert(g_is_init); - assert(sal_mem_is_address_valid(origin)); - assert(cell_size); - assert(cell_size <= MAX_ZOOM); - assert(buff_size); - assert(buffer); - - /* We make use of openmp for multi-threaded looping. This allows even - faster render times, wherever openmp is supported. - */ - #pragma omp parallel for - for (i = 0; i < buff_size; i++) { - uint32 j; - uint32 inst_sum = 0; - uint32 alloc_found = 0; - uint32 cell_addr = origin + (i * cell_size); - - for (j = 0; j < cell_size; j++) { - uint32 address = j + cell_addr; - - if (sal_mem_is_address_valid(address)) { - inst_sum += sal_mem_get_inst(address); - - if (sal_mem_is_allocated(address)) { - alloc_found = ALLOCATED_FLAG; - } - } - } - - buffer[i] = (uint8)(inst_sum / cell_size); - buffer[i] |= (uint8)(alloc_found); - } -} - static boolean inst_count_is_correct(void) { /* Check that the instruction counter is in a valid state diff --git a/src/render.c b/src/render.c new file mode 100644 index 0000000..128ce3b --- /dev/null +++ b/src/render.c @@ -0,0 +1,93 @@ +#include <assert.h> +#include <stdio.h> +#include "types.h" +#include "memory.h" +#include "process.h" +#include "render.h" + +#define MAX_ZOOM 0x10000 +#define BLOCK_FLAG 0x40 +#define IP_FLAG 0x80 + +static void apply_flag( + uint32 origin, uint32 max_pos, uint32 cell_size, uint32 address, + uint32 flag, uint8_p buffer +) { + if (address >= origin && address < max_pos) { + /* Flag falls inside rendered image. We can 'and' the bit to the + corresponding pixel. + */ + uint32 pixel = (address - origin) / cell_size; + buffer[pixel] |= flag; + } +} + +void sal_ren_get_image( + uint32 origin, uint32 cell_size, uint32 buff_size, uint8_p buffer +) { + /* Render a 1D image of a given section of memory, at a given resolution + (zoom) and store it in a pre-allocated 'buffer'. + + On the Salis python handler we draw memory as a 1D 'image' on the WORLD + page. If we were to render this image directly on python, it would be + excruciatingly slow, as we have to iterate over large areas of memory! + Therefore, this memory module comes with a built-in, super fast renderer. + */ + uint32 i; + uint32 max_pos; + assert(sal_mem_is_address_valid(origin)); + assert(cell_size); + assert(cell_size <= MAX_ZOOM); + assert(buff_size); + assert(buffer); + + /* We make use of openmp for multi-threaded looping. This allows even + faster render times, wherever openmp is supported. + */ + #pragma omp parallel for + for (i = 0; i < buff_size; i++) { + uint32 j; + uint32 inst_sum = 0; + uint32 alloc_flag = 0; + uint32 cell_addr = origin + (i * cell_size); + + for (j = 0; j < cell_size; j++) { + uint32 address = j + cell_addr; + + if (!sal_mem_is_address_valid(address)) { + continue; + } + + inst_sum += sal_mem_get_inst(address); + + if (sal_mem_is_allocated(address)) { + alloc_flag = ALLOCATED_FLAG; + } + } + + buffer[i] = (uint8)(inst_sum / cell_size); + buffer[i] |= (uint8)(alloc_flag); + } + + /* We also iterate through all processes and append extra bit flags to the + rendered image signaling process IP position and memory block limits. + */ + max_pos = origin + (cell_size * buff_size); + + #pragma omp parallel for + for (i = 0; i < sal_proc_get_count(); i++) { + if (!sal_proc_is_free(i)) { + Process proc = sal_proc_get_proc(i); + apply_flag(origin, max_pos, cell_size, proc.ip, IP_FLAG, buffer); + apply_flag( + origin, max_pos, cell_size, proc.mb1a, BLOCK_FLAG, buffer + ); + + if (proc.mb2s) { + apply_flag( + origin, max_pos, cell_size, proc.mb2a, BLOCK_FLAG, buffer + ); + } + } + } +} |