2016-03-03 08:46:38 +05:30
|
|
|
// Copyright 2016 Citra Emulator Project
|
|
|
|
// Licensed under GPLv2 or any later version
|
|
|
|
// Refer to the license.txt file included.
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2016-04-30 21:04:51 +05:30
|
|
|
#include <array>
|
2019-08-07 21:38:52 +05:30
|
|
|
#include "boost/serialization/split_member.hpp"
|
2016-04-30 21:04:51 +05:30
|
|
|
#include "common/bit_field.h"
|
|
|
|
#include "common/common_types.h"
|
2017-04-17 12:31:45 +05:30
|
|
|
#include "common/vector_math.h"
|
pica/command_processor: build geometry pipeline and run geometry shader
The geometry pipeline manages data transfer between VS, GS and primitive assembler. It has known four modes:
- no GS mode: sends VS output directly to the primitive assembler (what citra currently does)
- GS mode 0: sends VS output to GS input registers, and sends GS output to primitive assembler
- GS mode 1: sends VS output to GS uniform registers, and sends GS output to primitive assembler. It also takes an index from the index buffer at the beginning of each primitive for determine the primitive size.
- GS mode 2: similar to mode 1, but doesn't take the index and uses a fixed primitive size.
hwtest shows that immediate mode also supports GS (at least for mode 0), so the geometry pipeline gets refactored into its own class for supporting both drawing mode.
In the immediate mode, some games don't set the pipeline registers to a valid value until the first attribute input, so a geometry pipeline reset flag is set in `pipeline.vs_default_attributes_setup.index` trigger, and the actual pipeline reconfigure is triggered in the first attribute input.
In the normal drawing mode with index buffer, the vertex cache is a little bit modified to support the geometry pipeline. Instead of OutputVertex, it now holds AttributeBuffer, which is the input to the geometry pipeline. The AttributeBuffer->OutputVertex conversion is done inside the pipeline vertex handler. The actual hardware vertex cache is believed to be implemented in a similar way (because this is the only way that makes sense).
Both geometry pipeline and GS unit rely on states preservation across drawing call, so they are put into the global state. In the future, the other three vertex shader units should be also placed in the global state, and a scheduler should be implemented on top of the four units. Note that the current gs_unit already allows running VS on it in the future.
2017-08-04 19:33:17 +05:30
|
|
|
#include "video_core/geometry_pipeline.h"
|
2016-03-03 08:46:38 +05:30
|
|
|
#include "video_core/primitive_assembly.h"
|
2017-01-29 02:57:24 +05:30
|
|
|
#include "video_core/regs.h"
|
2016-03-03 08:46:38 +05:30
|
|
|
#include "video_core/shader/shader.h"
|
|
|
|
|
2019-08-07 21:38:52 +05:30
|
|
|
// NB, by defining this we can't use the built-in std::array serializer in this file
|
|
|
|
namespace boost::serialization {
|
|
|
|
|
|
|
|
template<class Archive, typename Value, size_t Size>
|
|
|
|
void serialize(Archive & ar, std::array<Value, Size> &array, const unsigned int version)
|
|
|
|
{
|
|
|
|
static_assert(sizeof(Value) == sizeof(u32));
|
|
|
|
ar & *static_cast<u32 (*)[Size]>(static_cast<void *>(array.data()));
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2016-03-03 08:46:38 +05:30
|
|
|
namespace Pica {
|
|
|
|
|
|
|
|
/// Struct used to describe current Pica state
|
|
|
|
struct State {
|
pica/command_processor: build geometry pipeline and run geometry shader
The geometry pipeline manages data transfer between VS, GS and primitive assembler. It has known four modes:
- no GS mode: sends VS output directly to the primitive assembler (what citra currently does)
- GS mode 0: sends VS output to GS input registers, and sends GS output to primitive assembler
- GS mode 1: sends VS output to GS uniform registers, and sends GS output to primitive assembler. It also takes an index from the index buffer at the beginning of each primitive for determine the primitive size.
- GS mode 2: similar to mode 1, but doesn't take the index and uses a fixed primitive size.
hwtest shows that immediate mode also supports GS (at least for mode 0), so the geometry pipeline gets refactored into its own class for supporting both drawing mode.
In the immediate mode, some games don't set the pipeline registers to a valid value until the first attribute input, so a geometry pipeline reset flag is set in `pipeline.vs_default_attributes_setup.index` trigger, and the actual pipeline reconfigure is triggered in the first attribute input.
In the normal drawing mode with index buffer, the vertex cache is a little bit modified to support the geometry pipeline. Instead of OutputVertex, it now holds AttributeBuffer, which is the input to the geometry pipeline. The AttributeBuffer->OutputVertex conversion is done inside the pipeline vertex handler. The actual hardware vertex cache is believed to be implemented in a similar way (because this is the only way that makes sense).
Both geometry pipeline and GS unit rely on states preservation across drawing call, so they are put into the global state. In the future, the other three vertex shader units should be also placed in the global state, and a scheduler should be implemented on top of the four units. Note that the current gs_unit already allows running VS on it in the future.
2017-08-04 19:33:17 +05:30
|
|
|
State();
|
2016-03-06 04:19:23 +05:30
|
|
|
void Reset();
|
|
|
|
|
2016-03-03 08:46:38 +05:30
|
|
|
/// Pica registers
|
|
|
|
Regs regs;
|
|
|
|
|
|
|
|
Shader::ShaderSetup vs;
|
|
|
|
Shader::ShaderSetup gs;
|
|
|
|
|
2016-12-19 06:12:19 +05:30
|
|
|
Shader::AttributeBuffer input_default_attributes;
|
2016-05-12 13:36:35 +05:30
|
|
|
|
2017-04-17 12:31:45 +05:30
|
|
|
struct ProcTex {
|
|
|
|
union ValueEntry {
|
|
|
|
u32 raw;
|
|
|
|
|
|
|
|
// LUT value, encoded as 12-bit fixed point, with 12 fraction bits
|
|
|
|
BitField<0, 12, u32> value; // 0.0.12 fixed point
|
|
|
|
|
|
|
|
// Difference between two entry values. Used for efficient interpolation.
|
|
|
|
// 0.0.12 fixed point with two's complement. The range is [-0.5, 0.5).
|
|
|
|
// Note: the type of this is different from the one of lighting LUT
|
|
|
|
BitField<12, 12, s32> difference;
|
|
|
|
|
|
|
|
float ToFloat() const {
|
|
|
|
return static_cast<float>(value) / 4095.f;
|
|
|
|
}
|
|
|
|
|
|
|
|
float DiffToFloat() const {
|
|
|
|
return static_cast<float>(difference) / 4095.f;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
union ColorEntry {
|
|
|
|
u32 raw;
|
|
|
|
BitField<0, 8, u32> r;
|
|
|
|
BitField<8, 8, u32> g;
|
|
|
|
BitField<16, 8, u32> b;
|
|
|
|
BitField<24, 8, u32> a;
|
|
|
|
|
2019-02-27 09:08:34 +05:30
|
|
|
Common::Vec4<u8> ToVector() const {
|
2017-04-17 12:31:45 +05:30
|
|
|
return {static_cast<u8>(r), static_cast<u8>(g), static_cast<u8>(b),
|
|
|
|
static_cast<u8>(a)};
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
union ColorDifferenceEntry {
|
|
|
|
u32 raw;
|
|
|
|
BitField<0, 8, s32> r; // half of the difference between two ColorEntry
|
|
|
|
BitField<8, 8, s32> g;
|
|
|
|
BitField<16, 8, s32> b;
|
|
|
|
BitField<24, 8, s32> a;
|
|
|
|
|
2019-02-27 09:08:34 +05:30
|
|
|
Common::Vec4<s32> ToVector() const {
|
|
|
|
return Common::Vec4<s32>{r, g, b, a} * 2;
|
2017-04-17 12:31:45 +05:30
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::array<ValueEntry, 128> noise_table;
|
|
|
|
std::array<ValueEntry, 128> color_map_table;
|
|
|
|
std::array<ValueEntry, 128> alpha_map_table;
|
|
|
|
std::array<ColorEntry, 256> color_table;
|
|
|
|
std::array<ColorDifferenceEntry, 256> color_diff_table;
|
2019-08-07 21:38:52 +05:30
|
|
|
|
|
|
|
private:
|
|
|
|
friend class boost::serialization::access;
|
|
|
|
template<class Archive>
|
|
|
|
void serialize(Archive & ar, const unsigned int file_version)
|
|
|
|
{
|
|
|
|
ar & noise_table;
|
|
|
|
ar & color_map_table;
|
|
|
|
ar & alpha_map_table;
|
|
|
|
ar & color_table;
|
|
|
|
ar & color_diff_table;
|
|
|
|
}
|
2017-04-17 12:31:45 +05:30
|
|
|
} proctex;
|
|
|
|
|
2017-06-28 23:04:16 +05:30
|
|
|
struct Lighting {
|
2016-03-03 08:46:38 +05:30
|
|
|
union LutEntry {
|
|
|
|
// Used for raw access
|
|
|
|
u32 raw;
|
|
|
|
|
|
|
|
// LUT value, encoded as 12-bit fixed point, with 12 fraction bits
|
2016-09-18 06:08:01 +05:30
|
|
|
BitField<0, 12, u32> value; // 0.0.12 fixed point
|
2016-03-03 08:46:38 +05:30
|
|
|
|
2017-06-16 16:30:15 +05:30
|
|
|
// Used for efficient interpolation.
|
|
|
|
BitField<12, 11, u32> difference; // 0.0.11 fixed point
|
|
|
|
BitField<23, 1, u32> neg_difference;
|
2016-03-03 08:46:38 +05:30
|
|
|
|
2017-06-16 16:30:15 +05:30
|
|
|
float ToFloat() const {
|
2016-03-03 08:46:38 +05:30
|
|
|
return static_cast<float>(value) / 4095.f;
|
|
|
|
}
|
2017-06-16 16:30:15 +05:30
|
|
|
|
|
|
|
float DiffToFloat() const {
|
|
|
|
float diff = static_cast<float>(difference) / 2047.f;
|
|
|
|
return neg_difference ? -diff : diff;
|
|
|
|
}
|
2019-08-07 21:38:52 +05:30
|
|
|
|
|
|
|
template<class Archive>
|
|
|
|
void serialize(Archive & ar, const unsigned int file_version)
|
|
|
|
{
|
|
|
|
ar & raw;
|
|
|
|
}
|
2016-03-03 08:46:38 +05:30
|
|
|
};
|
|
|
|
|
|
|
|
std::array<std::array<LutEntry, 256>, 24> luts;
|
|
|
|
} lighting;
|
|
|
|
|
2016-05-11 17:09:28 +05:30
|
|
|
struct {
|
|
|
|
union LutEntry {
|
|
|
|
// Used for raw access
|
|
|
|
u32 raw;
|
|
|
|
|
2016-09-18 06:08:01 +05:30
|
|
|
BitField<0, 13, s32> difference; // 1.1.11 fixed point
|
|
|
|
BitField<13, 11, u32> value; // 0.0.11 fixed point
|
2017-06-22 19:52:45 +05:30
|
|
|
|
|
|
|
float ToFloat() const {
|
|
|
|
return static_cast<float>(value) / 2047.0f;
|
|
|
|
}
|
|
|
|
|
|
|
|
float DiffToFloat() const {
|
|
|
|
return static_cast<float>(difference) / 2047.0f;
|
|
|
|
}
|
2016-05-11 17:09:28 +05:30
|
|
|
};
|
|
|
|
|
|
|
|
std::array<LutEntry, 128> lut;
|
|
|
|
} fog;
|
|
|
|
|
2019-08-07 21:38:52 +05:30
|
|
|
#undef SERIALIZE_RAW
|
|
|
|
|
2016-03-03 08:46:38 +05:30
|
|
|
/// Current Pica command list
|
|
|
|
struct {
|
2019-08-07 21:38:52 +05:30
|
|
|
PAddr addr; // This exists only for serialization
|
2016-03-03 08:46:38 +05:30
|
|
|
const u32* head_ptr;
|
|
|
|
const u32* current_ptr;
|
|
|
|
u32 length;
|
|
|
|
} cmd_list;
|
|
|
|
|
|
|
|
/// Struct used to describe immediate mode rendering state
|
|
|
|
struct ImmediateModeState {
|
2016-03-06 04:19:23 +05:30
|
|
|
// Used to buffer partial vertices for immediate-mode rendering.
|
2016-12-19 06:12:19 +05:30
|
|
|
Shader::AttributeBuffer input_vertex;
|
2016-03-06 04:19:23 +05:30
|
|
|
// Index of the next attribute to be loaded into `input_vertex`.
|
2016-04-26 01:40:03 +05:30
|
|
|
u32 current_attribute = 0;
|
pica/command_processor: build geometry pipeline and run geometry shader
The geometry pipeline manages data transfer between VS, GS and primitive assembler. It has known four modes:
- no GS mode: sends VS output directly to the primitive assembler (what citra currently does)
- GS mode 0: sends VS output to GS input registers, and sends GS output to primitive assembler
- GS mode 1: sends VS output to GS uniform registers, and sends GS output to primitive assembler. It also takes an index from the index buffer at the beginning of each primitive for determine the primitive size.
- GS mode 2: similar to mode 1, but doesn't take the index and uses a fixed primitive size.
hwtest shows that immediate mode also supports GS (at least for mode 0), so the geometry pipeline gets refactored into its own class for supporting both drawing mode.
In the immediate mode, some games don't set the pipeline registers to a valid value until the first attribute input, so a geometry pipeline reset flag is set in `pipeline.vs_default_attributes_setup.index` trigger, and the actual pipeline reconfigure is triggered in the first attribute input.
In the normal drawing mode with index buffer, the vertex cache is a little bit modified to support the geometry pipeline. Instead of OutputVertex, it now holds AttributeBuffer, which is the input to the geometry pipeline. The AttributeBuffer->OutputVertex conversion is done inside the pipeline vertex handler. The actual hardware vertex cache is believed to be implemented in a similar way (because this is the only way that makes sense).
Both geometry pipeline and GS unit rely on states preservation across drawing call, so they are put into the global state. In the future, the other three vertex shader units should be also placed in the global state, and a scheduler should be implemented on top of the four units. Note that the current gs_unit already allows running VS on it in the future.
2017-08-04 19:33:17 +05:30
|
|
|
// Indicates the immediate mode just started and the geometry pipeline needs to reconfigure
|
|
|
|
bool reset_geometry_pipeline = true;
|
2019-08-07 21:38:52 +05:30
|
|
|
|
|
|
|
private:
|
|
|
|
friend class boost::serialization::access;
|
|
|
|
template<class Archive>
|
|
|
|
void serialize(Archive & ar, const unsigned int file_version)
|
|
|
|
{
|
|
|
|
// ar & input_vertex;
|
|
|
|
ar & current_attribute;
|
|
|
|
ar & reset_geometry_pipeline;
|
|
|
|
}
|
|
|
|
|
2016-03-03 08:46:38 +05:30
|
|
|
} immediate;
|
2016-03-06 04:19:23 +05:30
|
|
|
|
pica/command_processor: build geometry pipeline and run geometry shader
The geometry pipeline manages data transfer between VS, GS and primitive assembler. It has known four modes:
- no GS mode: sends VS output directly to the primitive assembler (what citra currently does)
- GS mode 0: sends VS output to GS input registers, and sends GS output to primitive assembler
- GS mode 1: sends VS output to GS uniform registers, and sends GS output to primitive assembler. It also takes an index from the index buffer at the beginning of each primitive for determine the primitive size.
- GS mode 2: similar to mode 1, but doesn't take the index and uses a fixed primitive size.
hwtest shows that immediate mode also supports GS (at least for mode 0), so the geometry pipeline gets refactored into its own class for supporting both drawing mode.
In the immediate mode, some games don't set the pipeline registers to a valid value until the first attribute input, so a geometry pipeline reset flag is set in `pipeline.vs_default_attributes_setup.index` trigger, and the actual pipeline reconfigure is triggered in the first attribute input.
In the normal drawing mode with index buffer, the vertex cache is a little bit modified to support the geometry pipeline. Instead of OutputVertex, it now holds AttributeBuffer, which is the input to the geometry pipeline. The AttributeBuffer->OutputVertex conversion is done inside the pipeline vertex handler. The actual hardware vertex cache is believed to be implemented in a similar way (because this is the only way that makes sense).
Both geometry pipeline and GS unit rely on states preservation across drawing call, so they are put into the global state. In the future, the other three vertex shader units should be also placed in the global state, and a scheduler should be implemented on top of the four units. Note that the current gs_unit already allows running VS on it in the future.
2017-08-04 19:33:17 +05:30
|
|
|
// the geometry shader needs to be kept in the global state because some shaders relie on
|
|
|
|
// preserved register value across shader invocation.
|
|
|
|
// TODO: also bring the three vertex shader units here and implement the shader scheduler.
|
|
|
|
Shader::GSUnitState gs_unit;
|
|
|
|
|
|
|
|
GeometryPipeline geometry_pipeline;
|
|
|
|
|
2016-03-06 04:19:23 +05:30
|
|
|
// This is constructed with a dummy triangle topology
|
|
|
|
PrimitiveAssembler<Shader::OutputVertex> primitive_assembler;
|
2019-06-20 04:09:08 +05:30
|
|
|
|
|
|
|
int vs_float_regs_counter = 0;
|
|
|
|
u32 vs_uniform_write_buffer[4]{};
|
|
|
|
|
|
|
|
int gs_float_regs_counter = 0;
|
|
|
|
u32 gs_uniform_write_buffer[4]{};
|
|
|
|
|
|
|
|
int default_attr_counter = 0;
|
|
|
|
u32 default_attr_write_buffer[3]{};
|
2019-08-07 21:38:52 +05:30
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
friend class boost::serialization::access;
|
|
|
|
template<class Archive>
|
|
|
|
void serialize(Archive & ar, const unsigned int file_version)
|
|
|
|
{
|
|
|
|
ar & regs.reg_array;
|
|
|
|
// ar & vs;
|
|
|
|
// ar & gs;
|
|
|
|
// ar & input_default_attributes;
|
|
|
|
ar & proctex;
|
|
|
|
for (auto i = 0; i < lighting.luts.size(); i++) {
|
|
|
|
ar & lighting.luts[i];
|
|
|
|
}
|
|
|
|
ar & fog.lut;
|
|
|
|
ar & cmd_list.addr;
|
|
|
|
ar & cmd_list.length;
|
|
|
|
ar & immediate;
|
|
|
|
// ar & gs_unit;
|
|
|
|
// ar & geometry_pipeline;
|
|
|
|
// ar & primitive_assembler;
|
|
|
|
ar & vs_float_regs_counter;
|
|
|
|
ar & vs_uniform_write_buffer;
|
|
|
|
ar & gs_float_regs_counter;
|
|
|
|
ar & gs_uniform_write_buffer;
|
|
|
|
ar & default_attr_counter;
|
|
|
|
ar & default_attr_write_buffer;
|
|
|
|
boost::serialization::split_member(ar, *this, file_version);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<class Archive>
|
|
|
|
void save(Archive & ar, const unsigned int file_version) const
|
|
|
|
{
|
|
|
|
ar << static_cast<u32>(cmd_list.current_ptr - cmd_list.head_ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<class Archive>
|
|
|
|
void load(Archive & ar, const unsigned int file_version)
|
|
|
|
{
|
|
|
|
u32 offset{};
|
|
|
|
ar >> offset;
|
|
|
|
cmd_list.head_ptr = (u32*)VideoCore::g_memory->GetPhysicalPointer(cmd_list.addr);
|
|
|
|
cmd_list.current_ptr = cmd_list.head_ptr + offset;
|
|
|
|
}
|
2016-03-03 08:46:38 +05:30
|
|
|
};
|
|
|
|
|
|
|
|
extern State g_state; ///< Current Pica state
|
|
|
|
|
2018-03-09 23:24:43 +05:30
|
|
|
} // namespace Pica
|