From cb542ac55067f95871f55074d73579375ab11a2c Mon Sep 17 00:00:00 2001 From: Andres Gomez Date: Thu, 2 Aug 2018 18:28:04 +0300 Subject: [PATCH 001/367] Update version to 18.2.0-rc1 Signed-off-by: Andres Gomez --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 9a33c149fca..39b71f2a640 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.2.0-devel +18.2.0-rc1 From 2e903df72fcc54b03c113e7a52ac33034dfecf86 Mon Sep 17 00:00:00 2001 From: Vlad Golovkin Date: Wed, 1 Aug 2018 20:56:23 +0300 Subject: [PATCH 002/367] swr: Remove unnecessary memset call Zeroing memory after calloc is not necessary. This also allows to avoid possible crash when allocation fails, because memset is called before checking screen for NULL. Fixes: a29d63ecf71546c4798c6 "swr: refactor swr_create_screen to allow for proper cleanup on error" Reviewed-by: Eric Engestrom (cherry picked from commit 9d3a2394e4d3f91f1e23a971ce34aaf031a1e777) --- src/gallium/drivers/swr/swr_screen.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index fa232b6838b..67085444f84 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -1148,7 +1148,6 @@ struct pipe_screen * swr_create_screen_internal(struct sw_winsys *winsys) { struct swr_screen *screen = CALLOC_STRUCT(swr_screen); - memset(screen, 0, sizeof(struct swr_screen)); if (!screen) return NULL; From 9ddff68f6fae40308d3a38f0146f4e139e8b2e1b Mon Sep 17 00:00:00 2001 From: Andres Gomez Date: Thu, 2 Aug 2018 19:39:06 +0300 Subject: [PATCH 003/367] intel/tools: add error2aub creation into autotools Tarball distribution is done through "make distcheck". We include the meson targets also into autotools so they won't fail when building from the tarball. Fixes: 6a60beba408 ("intel/tools: Add an error state to aub translator") Cc: Jason Ekstrand Cc: Lionel Landwerlin Cc: Dylan Baker Signed-off-by: Andres Gomez Reviewed-by: Lionel Landwerlin Reviewed-by: Dylan Baker (cherry picked from commit 2d4d139877c0aa8efc323be6d9c37c846c8b193a) --- src/intel/Makefile.tools.am | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am index b00cc8cc2cb..00624084e6f 100644 --- a/src/intel/Makefile.tools.am +++ b/src/intel/Makefile.tools.am @@ -21,7 +21,9 @@ noinst_PROGRAMS += \ tools/aubinator \ - tools/aubinator_error_decode + tools/aubinator_error_decode \ + tools/error2aub + tools_aubinator_SOURCES = \ tools/aubinator.c \ @@ -59,3 +61,23 @@ tools_aubinator_error_decode_LDADD = \ tools_aubinator_error_decode_CFLAGS = \ $(AM_CFLAGS) \ $(ZLIB_CFLAGS) + + +tools_error2aub_SOURCES = \ + tools/gen_context.h \ + tools/gen8_context.h \ + tools/gen10_context.h \ + tools/aub_write.h \ + tools/aub_write.c \ + tools/error2aub.c + +tools_error2aub_CFLAGS = \ + $(AM_CFLAGS) \ + $(ZLIB_CFLAGS) + +tools_error2aub_LDADD = \ + dev/libintel_dev.la \ + $(PTHREAD_LIBS) \ + $(DLOPEN_LIBS) \ + $(ZLIB_LIBS) \ + -lm From bbeb78620c02d41bc59a81eacc70a03509ae4f8c Mon Sep 17 00:00:00 2001 From: Mauro Rossi Date: Thu, 2 Aug 2018 21:07:31 +0200 Subject: [PATCH 004/367] android: radv: build vulkan.radv conditionally to radeonsi A problem was reported with arm,arm64 targets build due to missing libLLVM shared library dependency with AOSP; to avoid this issue vulkan.radv is built conditionally only when radeonsi is in BOARD_GPU_DRIVERS Fixes: 0ca153f869 ("android: radv: enable build of vulkan.radv HAL module") Reported-by: John Stultz Signed-off-by: Mauro Rossi Reviewed-by: Emil Velikov Cc: "18.2" (cherry picked from commit 1c7a2433b270afb65f044d0cf49cb67715f50b5b) --- src/amd/Android.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/amd/Android.mk b/src/amd/Android.mk index 6129e360cbf..e40e7da01bd 100644 --- a/src/amd/Android.mk +++ b/src/amd/Android.mk @@ -27,4 +27,6 @@ include $(LOCAL_PATH)/Makefile.sources include $(LOCAL_PATH)/Android.addrlib.mk include $(LOCAL_PATH)/Android.common.mk +ifneq ($(filter radeonsi,$(BOARD_GPU_DRIVERS)),) include $(LOCAL_PATH)/vulkan/Android.mk +endif From 88c36f43797b5467267b33a725f1546f60088bc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 17 Jul 2018 01:52:25 -0400 Subject: [PATCH 005/367] gallium/u_vbuf: handle indirect multidraws correctly and efficiently (v3) v2: need to do MAX{start+count} instead of MAX{count} added piglit tests v3: use malloc Cc: 18.2 Reviewed-by: Eric Anholt (cherry picked from commit 0f79b2015bc0c44a8ed470684b6789f0e2e6aa6c) --- src/gallium/auxiliary/util/u_vbuf.c | 204 ++++++++++++++++++++++++---- 1 file changed, 180 insertions(+), 24 deletions(-) diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index 746ff1085ce..a7a8a3be21b 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -1131,6 +1131,31 @@ static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr) mgr->dirty_real_vb_mask = 0; } +static void +u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info, + unsigned *indirect_data, unsigned stride, + unsigned draw_count) +{ + assert(info->index_size); + info->indirect = NULL; + + for (unsigned i = 0; i < draw_count; i++) { + unsigned offset = i * stride / 4; + + info->count = indirect_data[offset + 0]; + info->instance_count = indirect_data[offset + 1]; + + if (!info->count || !info->instance_count) + continue; + + info->start = indirect_data[offset + 2]; + info->index_bias = indirect_data[offset + 3]; + info->start_instance = indirect_data[offset + 4]; + + u_vbuf_draw_vbo(mgr, info); + } +} + void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) { struct pipe_context *pipe = mgr->pipe; @@ -1160,33 +1185,163 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) new_info = *info; - /* Fallback. We need to know all the parameters. */ + /* Handle indirect (multi)draws. */ if (new_info.indirect) { - struct pipe_transfer *transfer = NULL; - int *data; - - if (new_info.index_size) { - data = pipe_buffer_map_range(pipe, new_info.indirect->buffer, - new_info.indirect->offset, 20, - PIPE_TRANSFER_READ, &transfer); - new_info.index_bias = data[3]; - new_info.start_instance = data[4]; - } - else { - data = pipe_buffer_map_range(pipe, new_info.indirect->buffer, - new_info.indirect->offset, 16, - PIPE_TRANSFER_READ, &transfer); - new_info.start_instance = data[3]; + const struct pipe_draw_indirect_info *indirect = new_info.indirect; + unsigned draw_count = 0; + + /* Get the number of draws. */ + if (indirect->indirect_draw_count) { + pipe_buffer_read(pipe, indirect->indirect_draw_count, + indirect->indirect_draw_count_offset, + 4, &draw_count); + } else { + draw_count = indirect->draw_count; } - new_info.count = data[0]; - new_info.instance_count = data[1]; - new_info.start = data[2]; - pipe_buffer_unmap(pipe, transfer); - new_info.indirect = NULL; - - if (!new_info.count) + if (!draw_count) return; + + unsigned data_size = (draw_count - 1) * indirect->stride + + (new_info.index_size ? 20 : 16); + unsigned *data = malloc(data_size); + if (!data) + return; /* report an error? */ + + /* Read the used buffer range only once, because the read can be + * uncached. + */ + pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size, + data); + + if (info->index_size) { + /* Indexed multidraw. */ + unsigned index_bias0 = data[3]; + bool index_bias_same = true; + + /* If we invoke the translate path, we have to split the multidraw. */ + if (incompatible_vb_mask || + mgr->ve->incompatible_elem_mask) { + u_vbuf_split_indexed_multidraw(mgr, &new_info, data, + indirect->stride, draw_count); + free(data); + return; + } + + /* See if index_bias is the same for all draws. */ + for (unsigned i = 1; i < draw_count; i++) { + if (data[i * indirect->stride / 4 + 3] != index_bias0) { + index_bias_same = false; + break; + } + } + + /* Split the multidraw if index_bias is different. */ + if (!index_bias_same) { + u_vbuf_split_indexed_multidraw(mgr, &new_info, data, + indirect->stride, draw_count); + free(data); + return; + } + + /* If we don't need to use the translate path and index_bias is + * the same, we can process the multidraw with the time complexity + * equal to 1 draw call (except for the index range computation). + * We only need to compute the index range covering all draw calls + * of the multidraw. + * + * The driver will not look at these values because indirect != NULL. + * These values determine the user buffer bounds to upload. + */ + new_info.index_bias = index_bias0; + new_info.min_index = ~0u; + new_info.max_index = 0; + new_info.start_instance = ~0u; + unsigned end_instance = 0; + + struct pipe_transfer *transfer = NULL; + const uint8_t *indices; + + if (info->has_user_indices) { + indices = (uint8_t*)info->index.user; + } else { + indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource, + PIPE_TRANSFER_READ, &transfer); + } + + for (unsigned i = 0; i < draw_count; i++) { + unsigned offset = i * indirect->stride / 4; + unsigned start = data[offset + 2]; + unsigned count = data[offset + 0]; + unsigned start_instance = data[offset + 4]; + unsigned instance_count = data[offset + 1]; + + if (!count || !instance_count) + continue; + + /* Update the ranges of instances. */ + new_info.start_instance = MIN2(new_info.start_instance, + start_instance); + end_instance = MAX2(end_instance, start_instance + instance_count); + + /* Update the index range. */ + unsigned min, max; + new_info.count = count; /* only used by get_minmax_index */ + u_vbuf_get_minmax_index_mapped(&new_info, + indices + + new_info.index_size * start, + &min, &max); + + new_info.min_index = MIN2(new_info.min_index, min); + new_info.max_index = MAX2(new_info.max_index, max); + } + free(data); + + if (transfer) + pipe_buffer_unmap(pipe, transfer); + + /* Set the final instance count. */ + new_info.instance_count = end_instance - new_info.start_instance; + + if (new_info.start_instance == ~0u || !new_info.instance_count) + return; + } else { + /* Non-indexed multidraw. + * + * Keep the draw call indirect and compute minimums & maximums, + * which will determine the user buffer bounds to upload, but + * the driver will not look at these values because indirect != NULL. + * + * This efficiently processes the multidraw with the time complexity + * equal to 1 draw call. + */ + new_info.start = ~0u; + new_info.start_instance = ~0u; + unsigned end_vertex = 0; + unsigned end_instance = 0; + + for (unsigned i = 0; i < draw_count; i++) { + unsigned offset = i * indirect->stride / 4; + unsigned start = data[offset + 2]; + unsigned count = data[offset + 0]; + unsigned start_instance = data[offset + 3]; + unsigned instance_count = data[offset + 1]; + + new_info.start = MIN2(new_info.start, start); + new_info.start_instance = MIN2(new_info.start_instance, + start_instance); + + end_vertex = MAX2(end_vertex, start + count); + end_instance = MAX2(end_instance, start_instance + instance_count); + } + + /* Set the final counts. */ + new_info.count = end_vertex - new_info.start; + new_info.instance_count = end_instance - new_info.start_instance; + + if (new_info.start == ~0u || !new_info.count || !new_info.instance_count) + return; + } } if (new_info.index_size) { @@ -1211,7 +1366,8 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) * We would have to break this drawing operation into several ones. */ /* Use some heuristic to see if unrolling indices improves * performance. */ - if (!new_info.primitive_restart && + if (!info->indirect && + !new_info.primitive_restart && num_vertices > new_info.count*2 && num_vertices - new_info.count > 32 && !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { From c18ed873a5fb470f38e535a4e876dde46d256213 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sat, 23 Jun 2018 19:01:34 +0200 Subject: [PATCH 006/367] nvc0/ir: return 0 in imageLoad on incomplete textures We already guarded all OP_SULDP against out of bound accesses, but we ended up just reusing whatever value was stored in the dest registers. Fixes CTS test shader_image_load_store.incomplete_textures v2: fix for loads not ending up with predicates (bindless_texture) v3: fix replacing the def Cc: Reviewed-by: Ilia Mirkin Signed-off-by: Karol Herbst (cherry picked from commit c3325097be93d6374a6b5f9fb5eee0878698ef77) --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 33 +++++++++++++++++-- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 1410cf26c87..743f5bd552b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2151,13 +2151,36 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) } } +void +NVC0LoweringPass::insertOOBSurfaceOpResult(TexInstruction *su) +{ + if (!su->getPredicate()) + return; + + bld.setPosition(su, true); + + for (unsigned i = 0; su->defExists(i); ++i) { + ValueDef &def = su->def(i); + + Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); + assert(su->cc == CC_NOT_P); + mov->setPredicate(CC_P, su->getPredicate()); + Instruction *uni = bld.mkOp2(OP_UNION, TYPE_U32, bld.getSSA(), NULL, mov->getDef(0)); + + def.replace(uni->getDef(0), false); + uni->setSrc(0, def.get()); + } +} + void NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) { processSurfaceCoordsNVE4(su); - if (su->op == OP_SULDP) + if (su->op == OP_SULDP) { convertSurfaceFormat(su); + insertOOBSurfaceOpResult(su); + } if (su->op == OP_SUREDB || su->op == OP_SUREDP) { assert(su->getPredicate()); @@ -2267,8 +2290,10 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su) processSurfaceCoordsNVC0(su); - if (su->op == OP_SULDP) + if (su->op == OP_SULDP) { convertSurfaceFormat(su); + insertOOBSurfaceOpResult(su); + } if (su->op == OP_SUREDB || su->op == OP_SUREDP) { const int dim = su->tex.target.getDim(); @@ -2370,8 +2395,10 @@ NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su) { processSurfaceCoordsGM107(su); - if (su->op == OP_SULDP) + if (su->op == OP_SULDP) { convertSurfaceFormat(su); + insertOOBSurfaceOpResult(su); + } if (su->op == OP_SUREDP) { Value *def = su->getDef(0); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 8724c09afd9..5dbb3e4f009 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -172,6 +172,7 @@ class NVC0LoweringPass : public Pass void processSurfaceCoordsNVE4(TexInstruction *); void processSurfaceCoordsNVC0(TexInstruction *); void convertSurfaceFormat(TexInstruction *); + void insertOOBSurfaceOpResult(TexInstruction *); Value *calculateSampleOffset(Value *sampleID); protected: From b6e9ef15561f7b18ca7c8b56159c7038cdd34866 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 31 Jul 2018 07:12:56 +0100 Subject: [PATCH 007/367] intel: aubinator: fix read the context/ring Up to now we've been lucky that the buffer returned was always exactly at the address we requested. Fixes: 144b40db5411 ("intel: aubinator: drop the 1Tb GTT mapping") Signed-off-by: Lionel Landwerlin Reviewed-by: Rafael Antognolli (cherry picked from commit 35955afa7aa49906fad772b44d3e6357203430ae) --- src/intel/tools/aubinator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c index 8989d558b66..3fec04c24c4 100644 --- a/src/intel/tools/aubinator.c +++ b/src/intel/tools/aubinator.c @@ -590,7 +590,7 @@ handle_memtrace_reg_write(uint32_t *p) uint32_t pphwsp_addr = context_descriptor & 0xfffff000; struct gen_batch_decode_bo pphwsp_bo = get_ggtt_batch_bo(NULL, pphwsp_addr); uint32_t *context = (uint32_t *)((uint8_t *)pphwsp_bo.map + - (pphwsp_bo.addr - pphwsp_addr) + + (pphwsp_addr - pphwsp_bo.addr) + pphwsp_size); uint32_t ring_buffer_head = context[5]; @@ -601,7 +601,7 @@ handle_memtrace_reg_write(uint32_t *p) struct gen_batch_decode_bo ring_bo = get_ggtt_batch_bo(NULL, ring_buffer_start); assert(ring_bo.size > 0); - void *commands = (uint8_t *)ring_bo.map + (ring_bo.addr - ring_buffer_start); + void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr); if (context_descriptor & 0x100 /* ppgtt */) { batch_ctx.get_bo = get_ppgtt_batch_bo; From cce78368dfbd83909b255b7556469103040ceb96 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 1 Aug 2018 16:37:08 -0700 Subject: [PATCH 008/367] v3d: Make sure we don't emit a thrsw before the last one finished. Found while forcing some spilling, which creates a lot of short tmua->thrsw->ldtmu sequences. Cc: "18.2" (cherry picked from commit ccbe33af5b086f4b488ac7ca8a8a45ebc9ac189c) --- src/broadcom/compiler/qpu_schedule.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index fb5ecd6410c..4f3b621fd29 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -462,6 +462,7 @@ struct choose_scoreboard { int last_magic_sfu_write_tick; int last_ldvary_tick; int last_uniforms_reset_tick; + int last_thrsw_tick; bool tlb_locked; }; @@ -1095,10 +1096,16 @@ qpu_instruction_valid_in_thrend_slot(struct v3d_compile *c, } static bool -valid_thrsw_sequence(struct v3d_compile *c, +valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard, struct qinst *qinst, int instructions_in_sequence, bool is_thrend) { + /* No emitting our thrsw while the previous thrsw hasn't happened yet. */ + if (scoreboard->last_thrsw_tick + 3 > + scoreboard->tick - instructions_in_sequence) { + return false; + } + for (int slot = 0; slot < instructions_in_sequence; slot++) { /* No scheduling SFU when the result would land in the other * thread. The simulator complains for safety, though it @@ -1159,7 +1166,8 @@ emit_thrsw(struct v3d_compile *c, if (!v3d_qpu_sig_pack(c->devinfo, &sig, &packed_sig)) break; - if (!valid_thrsw_sequence(c, prev_inst, slots_filled + 1, + if (!valid_thrsw_sequence(c, scoreboard, + prev_inst, slots_filled + 1, is_thrend)) { break; } @@ -1173,7 +1181,9 @@ emit_thrsw(struct v3d_compile *c, if (merge_inst) { merge_inst->qpu.sig.thrsw = true; needs_free = true; + scoreboard->last_thrsw_tick = scoreboard->tick - slots_filled; } else { + scoreboard->last_thrsw_tick = scoreboard->tick; insert_scheduled_instruction(c, block, scoreboard, inst); time++; slots_filled++; @@ -1475,6 +1485,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c) scoreboard.last_ldvary_tick = -10; scoreboard.last_magic_sfu_write_tick = -10; scoreboard.last_uniforms_reset_tick = -10; + scoreboard.last_thrsw_tick = -10; if (debug) { fprintf(stderr, "Pre-schedule instructions\n"); From c3b1a6d7faa3846c0fcfb9e61ee370145bfaf5fc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 1 Aug 2018 16:56:38 -0700 Subject: [PATCH 009/367] v3d: Wait for TMU writes to complete before continuing after a spill. The simulator complained that we had write responses outstanding at shader end. It seems that a TMU read does not guarantee that previous TMU writes by the thread have completed, which surprised me. Cc: "18.2" (cherry picked from commit 3f9cb2eb05152f4f0269e97893a16f23261f095b) --- src/broadcom/compiler/vir_register_allocate.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index 5a856acd7ed..d0a9f4e30d9 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -102,7 +102,7 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, started_last_seg = true; /* Track when we're in between a TMU setup and the - * final LDTMU from that TMU setup. We can't + * final LDTMU or TMUWT from that TMU setup. We can't * spill/fill any temps during that time, because that * involves inserting a new TMU setup/LDTMU sequence. */ @@ -110,6 +110,10 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, is_last_ldtmu(inst, block)) in_tmu_operation = false; + if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && + inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) + in_tmu_operation = false; + if (v3d_qpu_writes_tmu(&inst->qpu)) in_tmu_operation = true; } @@ -206,6 +210,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp) inst->dst); v3d_emit_spill_tmua(c, spill_offset); vir_emit_thrsw(c); + vir_TMUWT(c); c->spills++; } From c8d41bc58d5244f3494434960e6410eafb5c14fc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 1 Aug 2018 17:47:13 -0700 Subject: [PATCH 010/367] v3d: Make sure that QPU instruction-has-a-dest matches VIR. Found when debugging register spilling -- we would try to spill the dest of a STVPMV, inserting spill code after entering the last segment. In fact, we were likely to to choose to do this, given that the STVPMV "dest" temp was never read from, making it cheap to spill. Cc: "18.2" (cherry picked from commit f2c0d310d6efe560de8192ab468ba02d50c9ac1e) --- src/broadcom/compiler/v3d_compiler.h | 2 +- src/broadcom/compiler/vir.c | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 33a9942734d..70edeed2730 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -928,7 +928,7 @@ VIR_A_ALU2(OR) VIR_A_ALU2(XOR) VIR_A_ALU2(VADD) VIR_A_ALU2(VSUB) -VIR_A_ALU2(STVPMV) +VIR_A_NODST_2(STVPMV) VIR_A_ALU1(NOT) VIR_A_ALU1(NEG) VIR_A_ALU1(FLAPUSH) diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 86379faa5bb..fc0b34d4453 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -452,6 +452,16 @@ vir_emit_def(struct v3d_compile *c, struct qinst *inst) { assert(inst->dst.file == QFILE_NULL); + /* If we're emitting an instruction that's a def, it had better be + * writing a register. + */ + if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { + assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP || + v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op)); + assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP || + v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op)); + } + inst->dst = vir_get_temp(c); if (inst->dst.file == QFILE_TEMP) From 71aa72d69542aefe37b2739cf33005c5fbf58015 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 2 Aug 2018 11:12:37 -0700 Subject: [PATCH 011/367] v3d: Avoid spilling that breaks the r5 usage after a ldvary. Fixes bad rendering when forcing 2 spills in glxgears. Cc: "18.2" (cherry picked from commit 50a8713d4f90a6c70a23f9f5871420371df283a7) --- src/broadcom/compiler/vir_register_allocate.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index d0a9f4e30d9..61d273575d9 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -94,6 +94,15 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, } } + /* Refuse to spill a ldvary's dst, because that means + * that ldvary's r5 would end up being used across a + * thrsw. + */ + if (inst->qpu.sig.ldvary) { + assert(inst->dst.file == QFILE_TEMP); + BITSET_CLEAR(c->spillable, inst->dst.index); + } + if (inst->is_last_thrsw) started_last_seg = true; From 37fa81f631b10ebfa9b7158b102236ae2a28d054 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 2 Aug 2018 12:23:02 -0700 Subject: [PATCH 012/367] v3d: Emit the VCM_CACHE_SIZE packet. This is needed to ensure that we don't get blocked waiting for VPM space with bin/render overlapping. Cc: "18.2" (cherry picked from commit 1561e4984eb03d6946d19b820b83a96bbbd83b98) --- src/broadcom/cle/v3d_packet_v33.xml | 10 ++++++++++ src/broadcom/common/v3d_device_info.h | 7 ++++--- src/broadcom/compiler/v3d_compiler.h | 3 +++ src/broadcom/compiler/vir.c | 20 +++++++++++++++++++- src/gallium/drivers/v3d/v3d_screen.c | 2 ++ src/gallium/drivers/v3d/v3dx_draw.c | 7 +++++++ 6 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/broadcom/cle/v3d_packet_v33.xml b/src/broadcom/cle/v3d_packet_v33.xml index 6ce8299e26b..f471d542c56 100644 --- a/src/broadcom/cle/v3d_packet_v33.xml +++ b/src/broadcom/cle/v3d_packet_v33.xml @@ -528,6 +528,16 @@ + + + + + + + + + + diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h index 5685c7a2161..b0a2a02154c 100644 --- a/src/broadcom/common/v3d_device_info.h +++ b/src/broadcom/common/v3d_device_info.h @@ -27,13 +27,14 @@ #include /** - * Struct for tracking features of the V3D chip. This is where we'll store - * boolean flags for features in a specific version, but for now it's just the - * version + * Struct for tracking features of the V3D chip across driver and compiler. */ struct v3d_device_info { /** Simple V3D version: major * 10 + minor */ uint8_t ver; + + /** Size of the VPM, in bytes. */ + int vpm_size; }; #endif diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 70edeed2730..070e6a3aa59 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -648,6 +648,9 @@ struct v3d_vs_prog_data { /* Total number of components written, for the shader state record. */ uint32_t vpm_output_size; + + /* Value to be programmed in VCM_CACHE_SIZE. */ + uint8_t vcm_cache_size; }; struct v3d_fs_prog_data { diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index fc0b34d4453..1c8223165c6 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -756,10 +756,28 @@ uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler, if (prog_data->uses_iid) prog_data->vpm_input_size++; - /* Input/output segment size are in 8x32-bit multiples. */ + /* Input/output segment size are in sectors (8 rows of 32 bits per + * channel). + */ prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8; prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8; + /* Compute VCM cache size. We set up our program to take up less than + * half of the VPM, so that any set of bin and render programs won't + * run out of space. We need space for at least one input segment, + * and then allocate the rest to output segments (one for the current + * program, the rest to VCM). The valid range of the VCM cache size + * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4 + * batches. + */ + assert(c->devinfo->vpm_size); + int sector_size = 16 * sizeof(uint32_t) * 8; + int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size; + int half_vpm = vpm_size_in_sectors / 2; + int vpm_output_batches = half_vpm - prog_data->vpm_input_size; + assert(vpm_output_batches >= 2); + prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4); + return v3d_return_qpu_insts(c, final_assembly_size); } diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 2e743851bea..cae6cc3f6e7 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -585,6 +585,8 @@ v3d_get_device_info(struct v3d_screen *screen) uint32_t minor = (ident1.value >> 0) & 0xf; screen->devinfo.ver = major * 10 + minor; + screen->devinfo.vpm_size = (ident1.value >> 28 & 0xf) * 1024; + switch (screen->devinfo.ver) { case 33: case 41: diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index 479adb70fdb..bfb4af13ceb 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -306,6 +306,13 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, } } + cl_emit(&job->bcl, VCM_CACHE_SIZE, vcm) { + vcm.number_of_16_vertex_batches_for_binning = + v3d->prog.cs->prog_data.vs->vcm_cache_size; + vcm.number_of_16_vertex_batches_for_rendering = + v3d->prog.vs->prog_data.vs->vcm_cache_size; + } + cl_emit(&job->bcl, GL_SHADER_STATE, state) { state.address = cl_address(job->indirect.bo, shader_rec_offset); state.number_of_attribute_arrays = num_elements_to_emit; From 3c3589a0ba899d175cdea1e059afad86259ace74 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Fri, 3 Aug 2018 11:47:28 +0200 Subject: [PATCH 013/367] meson, install_megadrivers: Also remove stale symlinks os.path.exists doesn't return True for stale symlinks, but they are in the way later, when a link/file with the same name is to be created. For instance it is conceivable that the pointed to file is replaced by a file with a new name, and then the symlink is dead. To handle this check specifically for all existing symlinks to be removed. (This bugged me for some time with a link libXvMCr600.so always being in the way of installing this file) v2: use only os.lexist and replace all instances of os.exist (Dylan Baker) v3: handle directory check correctly (Eric Engestrom) Fixes: f7f1b30f81e842db6057591470ce3cb6d4fb2795 ("meson: extend install_megadrivers script to handle symmlinking") Reviewed-by: Eric Engestrom (v2 minus dir check) Reviewed-by: Dylan Baker Signed-off-by: Gert Wollny (cherry picked from commit 7a46b2d6418aa1adedf5621ad1a2a43676785851) --- bin/install_megadrivers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/install_megadrivers.py b/bin/install_megadrivers.py index 8d9ed9c6dce..551e385d1a5 100755 --- a/bin/install_megadrivers.py +++ b/bin/install_megadrivers.py @@ -43,13 +43,15 @@ def main(): master = os.path.join(to, os.path.basename(args.megadriver)) if not os.path.exists(to): + if os.path.lexists(to): + os.unlink(to) os.makedirs(to) shutil.copy(args.megadriver, master) for driver in args.drivers: abs_driver = os.path.join(to, driver) - if os.path.exists(abs_driver): + if os.path.lexists(abs_driver): os.unlink(abs_driver) print('installing {} to {}'.format(args.megadriver, abs_driver)) os.link(master, abs_driver) @@ -60,7 +62,7 @@ def main(): name, ext = os.path.splitext(driver) while ext != '.so': - if os.path.exists(name): + if os.path.lexists(name): os.unlink(name) os.symlink(driver, name) name, ext = os.path.splitext(name) From fdbbe4c50c5e637ef38ced5ef299446bc54ecdd7 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 18 Jul 2018 17:43:35 -0700 Subject: [PATCH 014/367] drisw: Fix build on Android Nougat, which lacks shm (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit cf54bd5e8, dri_sw_winsys.c began using to support the new functions putImageShm, getImageShm in DRI_SWRastLoader. But Android began supporting System V shared memory only in Oreo. Nougat has no shm headers. Fix the build by ifdef'ing out the shm code on Nougat. Fixes: cf54bd5e8 "drisw: use shared memory when possible" Reviewed-by: Dave Airlie Cc: Marc-André Lureau (cherry picked from commit aaa41cd297f91583f0e5bddb11b736c0d03e333d) --- src/gallium/winsys/sw/dri/dri_sw_winsys.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.c b/src/gallium/winsys/sw/dri/dri_sw_winsys.c index 40007200a5d..d519bcfedd3 100644 --- a/src/gallium/winsys/sw/dri/dri_sw_winsys.c +++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.c @@ -26,8 +26,12 @@ * **************************************************************************/ +#if !defined(ANDROID) || ANDROID_API_LEVEL >= 26 +/* Android's libc began supporting shm in Oreo */ +#define HAVE_SHM #include #include +#endif #include "pipe/p_compiler.h" #include "pipe/p_format.h" @@ -83,6 +87,7 @@ dri_sw_is_displaytarget_format_supported( struct sw_winsys *ws, return TRUE; } +#ifdef HAVE_SHM static char * alloc_shm(struct dri_sw_displaytarget *dri_sw_dt, unsigned size) { @@ -101,6 +106,7 @@ alloc_shm(struct dri_sw_displaytarget *dri_sw_dt, unsigned size) return addr; } +#endif static struct sw_displaytarget * dri_sw_displaytarget_create(struct sw_winsys *winsys, @@ -131,8 +137,11 @@ dri_sw_displaytarget_create(struct sw_winsys *winsys, size = dri_sw_dt->stride * nblocksy; dri_sw_dt->shmid = -1; + +#ifdef HAVE_SHM if (ws->lf->put_image_shm) dri_sw_dt->data = alloc_shm(dri_sw_dt, size); +#endif if(!dri_sw_dt->data) dri_sw_dt->data = align_malloc(size, alignment); @@ -156,8 +165,10 @@ dri_sw_displaytarget_destroy(struct sw_winsys *ws, struct dri_sw_displaytarget *dri_sw_dt = dri_sw_displaytarget(dt); if (dri_sw_dt->shmid >= 0) { +#ifdef HAVE_SHM shmdt(dri_sw_dt->data); shmctl(dri_sw_dt->shmid, IPC_RMID, 0); +#endif } else { align_free(dri_sw_dt->data); } From ed117c27e1123630952ae4a9fbe3d81ea45165fe Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 6 Aug 2018 15:28:56 -0700 Subject: [PATCH 015/367] vc4: Fix context creation when syncobjs aren't supported. Noticed when trying to run current Mesa on rpi's downstream kernel. Fixes: b0acc3a5628c ("broadcom/vc4: Native fence fd support") (cherry picked from commit 86095e9bb1335b082554ed2ceaaa66470b24cb28) --- src/gallium/drivers/vc4/vc4_fence.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c index 7071425595c..fac9df34d4f 100644 --- a/src/gallium/drivers/vc4/vc4_fence.c +++ b/src/gallium/drivers/vc4/vc4_fence.c @@ -142,8 +142,12 @@ vc4_fence_context_init(struct vc4_context *vc4) /* Since we initialize the in_fence_fd to -1 (no wait necessary), * we also need to initialize our in_syncobj as signaled. */ - return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED, - &vc4->in_syncobj); + if (vc4->screen->has_syncobj) { + return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &vc4->in_syncobj); + } else { + return 0; + } } void From d39fb6d1571597964c31adf380a1e772838936c7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 6 Aug 2018 18:53:57 -0700 Subject: [PATCH 016/367] vc4: Fix a leak of the no-vertex-elements workaround BO. Fixes: bd1925562ad1 ("vc4: Convert the driver to emitting the shader record using pack macros.") (cherry picked from commit 9507e036994018d3038e6263b98c53b0c916d2cd) --- src/gallium/drivers/vc4/vc4_draw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 900c0abaf20..06785516cae 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -222,6 +222,8 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, attr.coordinate_shader_vpm_offset = 0; attr.vertex_shader_vpm_offset = 0; } + + vc4_bo_unreference(&bo); } cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) { From 4a769c88505b025c31e58daced09301b21902c40 Mon Sep 17 00:00:00 2001 From: Jon Turney Date: Thu, 2 Aug 2018 14:50:27 +0100 Subject: [PATCH 017/367] meson: use correct keyword to fix a meson warning With a sufficently recent meson, the following warning is produced: WARNING: Passed invalid keyword argument "extra_args". WARNING: This will become a hard error in the future. It seems that compiler.links(args:) is meant here. Signed-off-by: Jon Turney Reviewed-and-Tested-by: Eric Engestrom Reviewed-by: Dylan Baker (cherry picked from commit a48c0659e12bfb2d715cceca75eff24ae6024bba) --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 86a4a4ce6da..cbf88b50139 100644 --- a/meson.build +++ b/meson.build @@ -989,7 +989,7 @@ if cc.links(''' freelocale(loc); return 0; }''', - extra_args : pre_args, + args : pre_args, name : 'strtod has locale support') pre_args += '-DHAVE_STRTOD_L' endif From 4a25d8b623ad2c5fee31f4649f76560aa115ed1b Mon Sep 17 00:00:00 2001 From: Andres Gomez Date: Thu, 9 Aug 2018 02:29:47 +0300 Subject: [PATCH 018/367] Update version to 18.2.0-rc2 Signed-off-by: Andres Gomez --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 39b71f2a640..b9266040774 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.2.0-rc1 +18.2.0-rc2 From adfbf1fe84e2777b7ad55a60625e478469a06d05 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 7 Aug 2018 12:59:14 -0700 Subject: [PATCH 019/367] vc4: Respect a sampler view's first_layer field. Fixes texturing from EGL images created from cubemap faces, as in dEQP-EGL.functional.image.create.gles2_cubemap_negative_x_rgba_texture Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 9ab6912a00ec29f5d1d9cebc7d3e32ae235419e8) --- src/gallium/drivers/vc4/vc4_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 408a9e0af2a..1e4657a7922 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -614,7 +614,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, } so->texture_p0 = - (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) | + (VC4_SET_FIELD((rsc->slices[0].offset + + cso->u.tex.first_layer * + rsc->cube_map_stride) >> 12, VC4_TEX_P0_OFFSET) | VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) | VC4_SET_FIELD(so->force_first_level ? cso->u.tex.last_level : From a42afc8504ee1891780b2cc615ca1183357d5948 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 7 Aug 2018 13:47:08 -0700 Subject: [PATCH 020/367] vc4: Ignore samplers for finding uniform offsets. Fixes: dEQP-GLES2.shaders.struct.uniform.sampler_array_fragment dEQP-GLES2.shaders.struct.uniform.sampler_array_vertex dEQP-GLES2.shaders.struct.uniform.sampler_nested_fragment dEQP-GLES2.shaders.struct.uniform.sampler_nested_vertex Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 69158c452bb39cd3d12110dd623aff09e771fa77) --- src/gallium/drivers/vc4/vc4_program.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 13c3b7678b2..1d767af1bdb 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -38,6 +38,7 @@ #include "vc4_context.h" #include "vc4_qpu.h" #include "vc4_qir.h" +#include "mesa/state_tracker/st_glsl_types.h" static struct qreg ntq_get_src(struct vc4_compile *c, nir_src src, int i); @@ -50,6 +51,12 @@ type_size(const struct glsl_type *type) return glsl_count_attribute_slots(type, false); } +static int +uniforms_type_size(const struct glsl_type *type) +{ + return st_glsl_storage_type_size(type, false); +} + static void resize_qreg_array(struct vc4_compile *c, struct qreg **regs, @@ -1685,7 +1692,7 @@ static void ntq_setup_uniforms(struct vc4_compile *c) { nir_foreach_variable(var, &c->s->uniforms) { - uint32_t vec4_count = type_size(var->type); + uint32_t vec4_count = uniforms_type_size(var->type); unsigned vec4_size = 4 * sizeof(float); declare_uniform_range(c, var->data.driver_location * vec4_size, @@ -2469,9 +2476,13 @@ vc4_shader_state_create(struct pipe_context *pctx, */ s = cso->ir.nir; - NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, + NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform, + type_size, (nir_lower_io_options)0); - } else { + NIR_PASS_V(s, nir_lower_io, nir_var_uniform, + uniforms_type_size, + (nir_lower_io_options)0); + } else { assert(cso->type == PIPE_SHADER_IR_TGSI); if (vc4_debug & VC4_DEBUG_TGSI) { From f0ae95492ad4def099acd16c712192d61f597fea Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 23 Feb 2018 19:32:00 +0000 Subject: [PATCH 021/367] automake: require shared glapi when using DRI based libGL This has been a requirement for ages, yet it seems like we never explicitly errored out during configure. CC: Signed-off-by: Emil Velikov Reviewed-by: Adam Jackson (cherry picked from commit a7ea7511ba76c0a83eec84dfc9c14c82b5c82dc4) --- configure.ac | 2 ++ src/glx/Makefile.am | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index ffb8424a07b..8937d238231 100644 --- a/configure.ac +++ b/configure.ac @@ -1658,6 +1658,8 @@ xxlib | xgallium-xlib) xdri) # DRI-based GLX + require_dri_shared_libs_and_glapi "GLX" + # find the DRI deps for libGL dri_modules="x11 xext xdamage >= $XDAMAGE_REQUIRED xfixes x11-xcb xcb xcb-glx >= $XCBGLX_REQUIRED" diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am index 5233257fb40..8f9d80c9f41 100644 --- a/src/glx/Makefile.am +++ b/src/glx/Makefile.am @@ -19,9 +19,6 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -if HAVE_SHARED_GLAPI -SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la -endif SUBDIRS = @@ -181,7 +178,7 @@ GL_LIBS = \ $(LIBDRM_LIBS) \ libglx.la \ $(top_builddir)/src/mapi/glapi/libglapi.la \ - $(SHARED_GLAPI_LIB) \ + $(top_builddir)/src/mapi/shared-glapi/libglapi.la \ $(GL_LIB_DEPS) GL_LDFLAGS = \ From 33ac5fb67829b494a78806fc07b8824cbd29bb9e Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 23 Feb 2018 19:32:04 +0000 Subject: [PATCH 022/367] autotools: error out when using the broken --with-{gl, osmesa}-lib-name The toggles were broken with the introduction of --enable-mangling. Fixing that up might be possible, but it's not worth the complexity since one can rename the libraries at any point. CC: Signed-off-by: Emil Velikov Reviewed-by: Adam Jackson (cherry picked from commit d5ac23647110fd530f9bf5002762587be446866d) --- configure.ac | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 8937d238231..8f5911ab698 100644 --- a/configure.ac +++ b/configure.ac @@ -1503,15 +1503,15 @@ fi AC_ARG_WITH([gl-lib-name], [AS_HELP_STRING([--with-gl-lib-name@<:@=NAME@:>@], [specify GL library name @<:@default=GL@:>@])], - [GL_LIB=$withval], - [GL_LIB="$DEFAULT_GL_LIB_NAME"]) + [AC_MSG_ERROR([--with-gl-lib-name is no longer supported. Rename the library manually if needed.])], + []) AC_ARG_WITH([osmesa-lib-name], [AS_HELP_STRING([--with-osmesa-lib-name@<:@=NAME@:>@], [specify OSMesa library name @<:@default=OSMesa@:>@])], - [OSMESA_LIB=$withval], - [OSMESA_LIB=OSMesa]) -AS_IF([test "x$GL_LIB" = xyes], [GL_LIB="$DEFAULT_GL_LIB_NAME"]) -AS_IF([test "x$OSMESA_LIB" = xyes], [OSMESA_LIB=OSMesa]) + [AC_MSG_ERROR([--with-osmesa-lib-name is no longer supported. Rename the library manually if needed.])], + []) +GL_LIB="$DEFAULT_GL_LIB_NAME" +OSMESA_LIB=OSMesa dnl dnl Mangled Mesa support From c70920697759f3c5257806168be9009364025cf4 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 23 Feb 2018 19:32:05 +0000 Subject: [PATCH 023/367] autotools: error out when building with mangling and glvnd It's not a thing that can work, nor is a wise idea to attempt. v2: Tweak error message (Dylan) CC: Signed-off-by: Emil Velikov Reviewed-by: Adam Jackson (v1) (cherry picked from commit 25a9450a44b3b572fba81e6cafe33f3367252499) --- configure.ac | 3 +++ 1 file changed, 3 insertions(+) diff --git a/configure.ac b/configure.ac index 8f5911ab698..ff4828c5b56 100644 --- a/configure.ac +++ b/configure.ac @@ -1523,6 +1523,9 @@ AC_ARG_ENABLE([mangling], [enable_mangling=no] ) if test "x${enable_mangling}" = "xyes" ; then + if test "x$enable_libglvnd" = xyes; then + AC_MSG_ERROR([Conflicting options --enable-mangling and --enable-libglvnd.]) + fi DEFINES="${DEFINES} -DUSE_MGL_NAMESPACE" GL_LIB="Mangled${GL_LIB}" OSMESA_LIB="Mangled${OSMESA_LIB}" From 6ae0a639ec3a000bc4106152e288f419386d75cb Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 23 Feb 2018 19:32:08 +0000 Subject: [PATCH 024/367] autotools: use correct gl.pc LIBS when using glvnd This is more of a hack, since glvnd itself should be providing the file. Until that happens, ensure the libs is correctly set to -lGL CC: Signed-off-by: Emil Velikov Reviewed-by: Adam Jackson (cherry picked from commit 315c46cfdc3dbd4d51b74ab26df693725e947724) --- configure.ac | 9 +++++++++ src/mesa/gl.pc.in | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index ff4828c5b56..864dcae8e34 100644 --- a/configure.ac +++ b/configure.ac @@ -1533,6 +1533,15 @@ fi AC_SUBST([GL_LIB]) AC_SUBST([OSMESA_LIB]) +dnl HACK when building glx + glvnd we ship gl.pc, despite that glvnd should do it +dnl Thus we need to use GL as a DSO name. +if test "x$enable_libglvnd" = xyes -a "x$enable_glx" != xno; then + GL_PKGCONF_LIB="GL" +else + GL_PKGCONF_LIB="$GL_LIB" +fi +AC_SUBST([GL_PKGCONF_LIB]) + # Check for libdrm PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED], [have_libdrm=yes], [have_libdrm=no]) diff --git a/src/mesa/gl.pc.in b/src/mesa/gl.pc.in index 181724b97bf..680f7427768 100644 --- a/src/mesa/gl.pc.in +++ b/src/mesa/gl.pc.in @@ -7,7 +7,7 @@ Name: gl Description: Mesa OpenGL library Requires.private: @GL_PC_REQ_PRIV@ Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -l@GL_LIB@ +Libs: -L${libdir} -l@GL_PKGCONF_LIB@ Libs.private: @GL_PC_LIB_PRIV@ Cflags: -I${includedir} @GL_PC_CFLAGS@ glx_tls: @GLX_TLS@ From 9ad14f71e6000249affc8e991db878e0e9e1ba19 Mon Sep 17 00:00:00 2001 From: "Juan A. Suarez Romero" Date: Mon, 4 Jun 2018 10:22:49 +0000 Subject: [PATCH 025/367] wayland/egl: initialize window surface size to window size When creating a windows surface with eglCreateWindowSurface(), the width and height returned by eglQuerySurface(EGL_{WIDTH,HEIGHT}) is invalid until buffers are updated (like calling glClear()). But according to EGL 1.5 spec, section 3.5.6 ("Surface Attributes"): "Querying EGL_WIDTH and EGL_HEIGHT returns respectively the width and height, in pixels, of the surface. For a window or pixmap surface, these values are initially equal to the width and height of the native window or pixmap with respect to which the surface was created" This fixes dEQP-EGL.functional.color_clears.* CTS tests v2: - Do not modify attached_{width,height} (Daniel) - Do not update size on resizing window (Brendan) CC: Daniel Stone CC: Brendan King CC: mesa-stable@lists.freedesktop.org Tested-by: Eric Engestrom Tested-by: Chad Versace Reviewed-by: Chad Versace Reviewed-by: Daniel Stone (cherry picked from commit 1fe7cbdf05b90034577dac4e4aa6157031d80521) --- src/egl/drivers/dri2/platform_wayland.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index dca099500a8..a5d43094cf3 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -258,6 +258,9 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, goto cleanup_surf; } + dri2_surf->base.Width = window->width; + dri2_surf->base.Height = window->height; + visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config); assert(visual_idx != -1); From 7af6be8864d7d8fecd61750c43751df0989a9416 Mon Sep 17 00:00:00 2001 From: "Juan A. Suarez Romero" Date: Wed, 6 Jun 2018 10:13:05 +0000 Subject: [PATCH 026/367] wayland/egl: update surface size on window resize According to EGL 1.5 spec, section 3.10.1.1 ("Native Window Resizing"): "If the native window corresponding to _surface_ has been resized prior to the swap, _surface_ must be resized to match. _surface_ will normally be resized by the EGL implementation at the time the native window is resized. If the implementation cannot do this transparently to the client, then *eglSwapBuffers* must detect the change and resize surface prior to copying its pixels to the native window." So far, resizing a native window in Wayland/EGL was interpreted in Mesa as a request to resize, which is not executed until the first draw call. And hence, surface size is not updated until executing it. Thus, querying the surface size with eglQuerySurface() after a window resize still returns the old values. This commit updates the surface size values as soon as the resize is done, even when the real resize is done in the draw call. This makes the semantics that any native window resize request take effect inmediately, and if user calls eglQuerySurface() it will return the new resized values. v2: update surface size if there isn't a back surface (Daniel) CC: Daniel Stone CC: mesa-stable@lists.freedesktop.org Reviewed-by: Daniel Stone (cherry picked from commit a9fb331ea7d1a78936ea8d8385e44cfd66f835c1) --- src/egl/drivers/dri2/platform_wayland.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index a5d43094cf3..83df0a8776b 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -201,6 +201,17 @@ resize_callback(struct wl_egl_window *wl_win, void *data) struct dri2_egl_display *dri2_dpy = dri2_egl_display(dri2_surf->base.Resource.Display); + /* Update the surface size as soon as native window is resized; from user + * pov, this makes the effect that resize is done inmediately after native + * window resize, without requiring to wait until the first draw. + * + * A more detailed and lengthy explanation can be found at + * https://lists.freedesktop.org/archives/mesa-dev/2018-June/196474.html + */ + if (!dri2_surf->back) { + dri2_surf->base.Width = wl_win->width; + dri2_surf->base.Height = wl_win->height; + } dri2_dpy->flush->invalidate(dri2_surf->dri_drawable); } @@ -580,8 +591,8 @@ update_buffers(struct dri2_egl_surface *dri2_surf) struct dri2_egl_display *dri2_dpy = dri2_egl_display(dri2_surf->base.Resource.Display); - if (dri2_surf->base.Width != dri2_surf->wl_win->width || - dri2_surf->base.Height != dri2_surf->wl_win->height) { + if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width || + dri2_surf->base.Height != dri2_surf->wl_win->attached_height) { dri2_wl_release_buffers(dri2_surf); @@ -1635,8 +1646,8 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf) if (dri2_surf->back) return 0; - if (dri2_surf->base.Width != dri2_surf->wl_win->width || - dri2_surf->base.Height != dri2_surf->wl_win->height) { + if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width || + dri2_surf->base.Height != dri2_surf->wl_win->attached_height) { dri2_wl_release_buffers(dri2_surf); From 9dacf10ca8165ca49c812ea82492dbf3e1d2a74c Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 22 Jan 2018 17:52:49 +0000 Subject: [PATCH 027/367] swr: don't export swr_create_screen_internal With earlier rework the user and provider of the symbol are within the same binary. Thus there's no point in exporting the function. Spotted while reviewing patch from Chuck, that nearly added another unneeded PUBLIC function. Cc: Chuck Atkins Cc: Tim Rowley Fixes: f50aa21456d "(swr: build driver proper separate from rasterizer") Signed-off-by: Emil Velikov Tested-by: Chuck Atkins Reviewed-By: George Kyriazis > Tested-by: Chuck Atkins > (cherry picked from commit 54d844897fe0afea4b5ddf08565af49a8191d808) --- src/gallium/drivers/swr/swr_public.h | 2 +- src/gallium/drivers/swr/swr_screen.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/swr_public.h b/src/gallium/drivers/swr/swr_public.h index 07ea6280cd6..b32f41fdf7b 100644 --- a/src/gallium/drivers/swr/swr_public.h +++ b/src/gallium/drivers/swr/swr_public.h @@ -37,7 +37,7 @@ extern "C" { struct pipe_screen *swr_create_screen(struct sw_winsys *winsys); // arch-specific dll entry point -PUBLIC struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys); +struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys); // cleanup for failed screen creation void swr_destroy_screen_internal(struct swr_screen **screen); diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index 67085444f84..084f55dab99 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -1143,7 +1143,6 @@ swr_validate_env_options(struct swr_screen *screen) } -PUBLIC struct pipe_screen * swr_create_screen_internal(struct sw_winsys *winsys) { From 1378f33142123f6f5b96ce1d4ac5e8e2440ea6a2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 7 Aug 2018 12:15:03 -0700 Subject: [PATCH 028/367] vc4: Fix vc4_fence_server_sync() on pre-syncobj kernels. We won't have an FD if we're just having the server wait on a fence created by eglCreateSyncKHR(). Our seqno fences will happen in order, so server-side waits are no-ops in that case. Fixes dEQP-EGL.functional.sharing.gles2.multithread.simple_egl_server_sync.buffers.gen_delete Fixes: b0acc3a5628c ("broadcom/vc4: Native fence fd support") (cherry picked from commit cfe69d0aaafadaaaca98517bc33307fba68197ba) --- src/gallium/drivers/vc4/vc4_fence.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c index fac9df34d4f..0dbfbe966b8 100644 --- a/src/gallium/drivers/vc4/vc4_fence.c +++ b/src/gallium/drivers/vc4/vc4_fence.c @@ -121,7 +121,8 @@ vc4_fence_server_sync(struct pipe_context *pctx, struct vc4_context *vc4 = vc4_context(pctx); struct vc4_fence *fence = vc4_fence(pfence); - sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd); + if (fence->fd >= 0) + sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd); } static int From 6606cacd3d96a0b87a3ce6ee7d85b37eec37caea Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Wed, 8 Aug 2018 15:26:32 +0100 Subject: [PATCH 029/367] intel/tools: add missing variable initialisation Fixes: 6a60beba4089315685b8 "intel/tools: Add an error state to aub translator" Signed-off-by: Eric Engestrom Reviewed-by: Lionel Landwerlin (cherry picked from commit aac80f75973b61a8a31f873a9de6bcf294ea493c) --- src/intel/tools/error2aub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/tools/error2aub.c b/src/intel/tools/error2aub.c index 3407dcec0b7..2030593691c 100644 --- a/src/intel/tools/error2aub.c +++ b/src/intel/tools/error2aub.c @@ -205,7 +205,7 @@ main(int argc, char *argv[]) BO_TYPE_UNKNOWN = 0, BO_TYPE_BATCH, BO_TYPE_USER, - } bo_type; + } bo_type = BO_TYPE_UNKNOWN; uint64_t bo_addr; char *line = NULL; From 8be5985e65bbcb070882db4a59fc0bf79c13581f Mon Sep 17 00:00:00 2001 From: "vadym.shovkoplias" Date: Mon, 6 Aug 2018 15:52:13 +0300 Subject: [PATCH 030/367] drirc: Allow extension midshader for Metro Redux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes both Metro 2033 Redux and Metro Last Light Redux Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99730 Signed-off-by: Eero Tamminen Signed-off-by: Vadym Shovkoplias Reviewed-by: Tapani Pälli (cherry picked from commit e0de26eacc93f431962533f50d57e58335843d6b) --- src/util/drirc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/util/drirc b/src/util/drirc index 8ece875e34f..c4f9e060f3a 100644 --- a/src/util/drirc +++ b/src/util/drirc @@ -120,6 +120,10 @@ TODO: document the other workarounds.