[英]How to fix long compilation for Verilog HDL in quartus
我一直在尝试使用Verilog HDL创建计数排序算法,但是当我尝试对其进行编译时,Quartus开始对其进行很长时间的编译。 我不知道是什么问题。
module sort(reset, clk, data_in0,data_in1,data_in2,data_in3,data_in4,data_in5,data_in6,data_in7,data_in8,data_in9, data_out0, data_out1, data_out2, data_out3, data_out4, data_out5, data_out6, data_out7, data_out8, data_out9);
input wire reset, clk;
input wire [1:0] data_in0;
input wire [1:0] data_in1;
input wire [1:0] data_in2;
input wire [1:0] data_in3;
input wire [1:0] data_in4;
input wire [1:0] data_in5;
input wire [1:0] data_in6;
input wire [1:0] data_in7;
input wire [1:0] data_in8;
input wire [1:0] data_in9;
output reg [1:0] data_out0;
output reg [1:0] data_out1;
output reg [1:0] data_out2;
output reg [1:0] data_out3;
output reg [1:0] data_out4;
output reg [1:0] data_out5;
output reg [1:0] data_out6;
output reg [1:0] data_out7;
output reg [1:0] data_out8;
output reg [1:0] data_out9;
reg [1:0] mem [9:0];
reg[9:0] buff [3:0];
integer i,k,j,f,s;
always@ (posedge clk)
begin
for(i=0; i<4; i=i+1)
buff[i]<=0;
if (reset == 1) begin
for (i = 0; i < 10; i = i + 1) mem[i]<=0;
s=0;
f=0;
end
else begin
if (f==0)begin
mem [0] <= data_in0;
mem[1]<=data_in1;
mem[2]<=data_in2;
mem[3]<=data_in3;
mem[4]<=data_in4;
mem[5]<=data_in5;
mem[6]<=data_in6;
mem[7]<=data_in7;
mem[8]<=data_in8;
mem[9]<=data_in9;
f=1;
end
for( i = 0; i <10 ; i=i+1)
begin
buff[mem[i]]<=buff[mem[i]]+1;
end
if(s==0) begin
k<=0;
for( i = 0; i <4 ; i=i+1)
begin
for( j = 0; j < 10 ; j = j +1)
begin
if(j<buff[i])
begin
mem[k]<=i;
k<=k+1;
end
end
end
end s=1;
data_out0 = mem[0];
data_out1 = mem[1];
data_out2 = mem[2];
data_out3 = mem[3];
data_out4 = mem[4];
data_out5 = mem[5];
data_out6 = mem[6];
data_out7 = mem[7];
data_out8 = mem[8];
data_out9 = mem[9];
end
end
endmodule
通过“分析和综合”部分需要花费很长时间。 我认为这是由于此代码中的错误或错误使用运算符引起的,但我不知道它的确切位置。
Verilog中的for循环无法按您期望的方式工作。 这不会逐步执行,但是综合工具将尝试展开循环,并且由于所有内容都包含在always @(posedge clk)
,它将在一个时钟周期内完成所有展开的语句。 使用状态机重新考虑模块以实现顺序。
这是解决问题的基于FSM的解决方案的一个示例。 虽然可以进行很大的改进,但这只是一个起点(并希望能起作用)。
首先,我更改了您的模块界面。 可以使用离散输入,但是由于该算法使用索引来在整个输入域上运行,因此我假设有两个外部存储器:一个带有输入数据,另一个带有存储输出数据。 该模块为两个存储器以及写入使能信号和数据总线驱动相应的地址总线。 还有一个busy
信号,因此系统的其余部分知道该模块尚未完成数据排序。 最后,我对16个数字进行了排序,而不是10个。
在内部,我使用了一个存储元素count
,作为保存输入数据直方图的向量。 由于该内存很小,因此我已用作四个独立的寄存器。 这使我可以在同一时钟周期内使用多个元素“ count”,例如count[3] <= count[3] + count[2] + count[1] + count[0];
我使用的算法版本来自Wikipedia: https : //en.wikipedia.org/wiki/Counting_sort
function countingSort(array, k) is
count ← new array of k zeros
for i = 1 to length(array) do
count[array[i]] ← count[array[i]] + 1
for i = 2 to k do
count[i] ← count[i] + count[i - 1]
for i = length(array) downto 1 do
output[count[array[i]]] ← array[i]
count[array[i]] ← count[array[i]] - 1
return output
这是Verilog模块:
module sort (
input wire clk,
input wire reset,
output reg [3:0] addr_data_in,
input wire [1:0] data_in,
output reg [3:0] addr_data_out,
output reg [1:0] data_out,
output reg write_data_out_strobe,
output reg busy
);
/*
function countingSort(array, k) is
count ← new array of k zeros
for i = 1 to length(array) do
count[array[i]] ← count[array[i]] + 1
for i = 2 to k do
count[i] ← count[i] + count[i - 1]
for i = length(array) downto 1 do
output[count[array[i]]] ← array[i]
count[array[i]] ← count[array[i]] - 1
return output
*/
localparam
ZERO = 3'd0,
MAKEHIST1 = 3'd1,
MAKEHIST2 = 3'd2,
PREFIXSUM = 3'd3,
PLACEOUTPUT1 = 3'd4,
PLACEOUTPUT2 = 3'd5,
IDLE = 3'd7
;
reg [4:0] count[0:3];
reg [2:0] state = IDLE;
reg [1:0] data;
always @(posedge clk) begin
if (reset == 1'b1) begin
state <= ZERO;
write_data_out_strobe <= 1'b0;
busy <= 1'b1;
end
else begin
case (state)
ZERO:
//count ← new array of k zeros
begin
count[0] <= 4'd0;
count[1] <= 4'd0;
count[2] <= 4'd0;
count[3] <= 4'd0;
addr_data_in <= 4'd0;
state <= MAKEHIST1;
end
MAKEHIST1:
//for i = 1 to length(array) do
// count[array[i]] ← count[array[i]] + 1
begin
data <= data_in;
addr_data_in <= addr_data_in + 4'd1;
state <= MAKEHIST2;
end
MAKEHIST2:
begin
count[data] <= count[data] + 4'd1;
if (addr_data_in == 4'd0)
state <= PREFIXSUM;
else
state <= MAKEHIST1;
end
PREFIXSUM:
//for i = 2 to k do
// count[i] ← count[i] + count[i - 1]
begin
count[1] <= count[1] + count[0];
count[2] <= count[2] + count[1] + count[0];
count[3] <= count[3] + count[2] + count[1] + count[0];
addr_data_in <= 4'd15;
state <= PLACEOUTPUT1;
end
PLACEOUTPUT1:
//for i = length(array) downto 1 do
// output[count[array[i]]] ← array[i]
// count[array[i]] ← count[array[i]] - 1
begin
data <= data_in;
addr_data_in <= addr_data_in - 4'd1;
write_data_out_strobe <= 1'b0;
state <= PLACEOUTPUT2;
end
PLACEOUTPUT2:
begin
addr_data_out <= count[data] - 5'd1;
data_out <= data;
count[data] <= count[data] - 4'd1;
write_data_out_strobe <= 1'b1;
if (addr_data_in == 4'd15)
state <= IDLE;
else
state <= PLACEOUTPUT1;
end
IDLE:
begin
write_data_out_strobe <= 1'b0;
busy <= 1'b0;
end
endcase
end // of else
end // of always
endmodule
您可以看到,由于我使用count的方式,这肯定会生成很多复用器和解码器,只是因为在某些地方我使用寄存器值作为count []的地址。 但是,我认为合成速度会更快。 仅供参考,Yosis可以在几秒钟内完成。
此外,这里有一个用于上述模块的测试台:
module tb_counting_sort;
reg clk, reset;
wire [3:0] addr_data_in, addr_data_out;
wire [1:0] data_in,data_out;
wire write_data_out_strobe, busy;
sort uut (
.clk(clk),
.reset(reset),
.addr_data_in(addr_data_in),
.data_in(data_in),
.addr_data_out(addr_data_out),
.data_out(data_out),
.write_data_out_strobe(write_data_out_strobe),
.busy(busy)
);
reg [1:0] vector_in[0:15];
reg [1:0] vector_out[0:15];
assign data_in = vector_in[addr_data_in];
always @(posedge clk)
if (write_data_out_strobe == 1'b1)
vector_out[addr_data_out] <= data_out;
integer i;
initial begin
vector_in[0] = 2'd2;
vector_in[1] = 2'd1;
vector_in[2] = 2'd0;
vector_in[3] = 2'd0;
vector_in[4] = 2'd3;
vector_in[5] = 2'd1;
vector_in[6] = 2'd0;
vector_in[7] = 2'd2;
vector_in[8] = 2'd1;
vector_in[9] = 2'd1;
vector_in[10] = 2'd3;
vector_in[11] = 2'd3;
vector_in[12] = 2'd3;
vector_in[13] = 2'd2;
vector_in[14] = 2'd1;
vector_in[15] = 2'd0;
reset = 1'b1;
clk = 1'b0;
repeat (2)
@(posedge clk);
reset = 1'b0;
@(negedge busy);
for (i=0;i<16;i=i+1)
$write ("%1d ", vector_out[i]);
$display("");
$finish;
end
always begin
clk = #5 ~clk;
end
endmodule
可以在EDAPlayground上查看,模拟或合成这两个模块, 网址为 : https ://www.edaplayground.com/x/6GLj
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.