1, FPGA device, using three 18bit x 18 bit multiplier to implement 32bit float multiplier
2, comparing to Altera float multiplyer IP
(1) just half of the LEs were used
(2) nearly same accuracy
VS2013, simulation by C
/////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <ios>
#include <iostream>
typedef unsigned int uint;
typedef unsigned long long uint64;
typedef long long int64;
uint GetBits(const float& v, int nstart, int cnt){
uint bits = 0;
uint vv;
memcpy(&vv, &v, sizeof(v));
//printf("%f, 0x%h) << std::endl; ", v, vv);
for(int i = cnt - 1; i >= 0; i--){
int idx = nstart + i;
uint t = 1 << idx;
bits <<= 1;
int b = (vv >> idx) & 1;
bits |= b;
//printf("%d", b);
}
//printf("\n");
return bits;
}
int64 HighestOne(int64 v){
int idx = -1;
for(int i = sizeof(v)* 8 - 1; i >= 0; i--){
if((v >> i) & 1){
idx = i;
break;
}
}
return idx;
}
template<typename T1, typename T2>
void SetBit(T1& src, int src_idx, T2& dst, int dst_idx){
int v = (src >> src_idx) & 1;
if(v)
dst |= v << dst_idx;
}
template<typename T1, typename T2>
void SetBits(T1& src, int src_idx, T2& dst, int dst_idx, int count){
for(int i = 0; i < count; i++){
SetBit(src, src_idx + i, dst, dst_idx + i);
}
}
float fpmul(float a, float b){
int64 s1 = GetBits(a, 31, 1);
int64 s2 = GetBits(b, 31, 1);
int64 e1 = GetBits(a, 23, 8);
int64 e2 = GetBits(b, 23, 8);
int64 f1 = GetBits(a, 0, 23);
int64 f2 = GetBits(b, 0, 23);
int64 a1 = GetBits(a, 14, 9);
int64 a2 = GetBits(b, 14, 9);
int64 b1 = GetBits(a, 5, 9);
int64 b2 = GetBits(b, 5, 9);
//sum = 1 + f1 + f2 + [ (a1*a2) + (a1*b2 + a2*b1)*2^(-9) + (c1*a2 + b1*b2 + a1*c2)*2^(-18) + 0]
int64 sum = int64(1 << 23)
+ (f1 + f2)
+ (int64(a1 * a2) << (28 - 23))
+ (int64(a1*b2 + a2*b1) >> (9 - (28 - 23)));
int nHightIndx = HighestOne(sum);
uint val = 0;
SetBits(sum, nHightIndx - 23, val, 0, 23);
int e = e1 - 127 + e2 - 127 + 127 + (nHightIndx - 23);
int s = ((s1 + s2) & 1) ? (1) : (0);
SetBits(e, 0, val, 23, 8);
SetBit(s, 0, val, 31);
//val = 0x4023d702;
float v;
memcpy(&v, &val, sizeof(v));
return v;
}
void Test_fpmul(){
//float v1 = 0.056984*0.056984;
//float v = fpmul(0.056984, 0.056984);
for(int i = -1000; i < 1000; i++){
float a = -0.23 + i*0.0003;
float b = 0.19 + i*0.0003;
float v = fpmul(a, b);
printf("a(%e), b(%e), a*b=%e, my_fmul=%e\n", a, b, (a*b), v);
}
}
inline float Hex2Float(uint val){
float v = 0;
memcpy(&v, &val, sizeof(val));
return v;
}
#define HEX2FLOAT(_x) Hex2Float((0x##_x))
inline void Verify(){
float s;
std::cout << HEX2FLOAT(3fc30f28) << " x " << HEX2FLOAT(3fc30f28) << std::endl;
std::cout << HEX2FLOAT(00000000) << std::endl;
std::cout << "--------------------------------" << std::endl;
//
//
std::cout << HEX2FLOAT(40a7a9fc) << " x " << HEX2FLOAT(40a7a9fc) << std::endl;
std::cout << HEX2FLOAT(40800000) << std::endl;
std::cout << "--------------------------------" << std::endl;
//
//
std::cout << HEX2FLOAT(4251954d) << " x " << HEX2FLOAT(4251954d) << std::endl;
std::cout << HEX2FLOAT(4014a012) << std::endl;
std::cout << "--------------------------------" << std::endl;
//
//
std::cout << HEX2FLOAT(4402fd52) << " x " << HEX2FLOAT(4402fd52) << std::endl;
std::cout << HEX2FLOAT(41db9e50) << std::endl;
std::cout << "--------------------------------" << std::endl;
//
//
std::cout << HEX2FLOAT(4251954d) << " x " << HEX2FLOAT(4402fd52) << std::endl;
std::cout << HEX2FLOAT(452b9514) << std::endl;
std::cout << "--------------------------------" << std::endl;
//
//
std::cout << HEX2FLOAT(3f07929f) << " x " << HEX2FLOAT(3f07929f) << std::endl;
std::cout << HEX2FLOAT(48860c65) << std::endl;
std::cout << "--------------------------------" << std::endl;
//
//
std::cout << HEX2FLOAT(bf07929f) << " x " << HEX2FLOAT(4402fd52) << std::endl;
std::cout << HEX2FLOAT(46d67a53) << std::endl;
std::cout << "--------------------------------" << std::endl;
//
//
std::cout << HEX2FLOAT(4380650b) << " x " << HEX2FLOAT(4380650b) << std::endl;
std::cout << HEX2FLOAT(3e8f97e9) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3da1ab4b) << " x " << HEX2FLOAT(3da1ab4b) << std::endl;
std::cout << HEX2FLOAT(c38abd2d) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3da1ab4b) << " x " << HEX2FLOAT(3f07929f) << std::endl;
std::cout << HEX2FLOAT(4780ca5b) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3da1ab4b) << " x " << HEX2FLOAT(00000000) << std::endl;
std::cout << HEX2FLOAT(3bcc31b9) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3bbab9a5) << " x " << HEX2FLOAT(3bbab9a5) << std::endl;
std::cout << HEX2FLOAT(3d2b3bca) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3bbab9a5) << " x " << HEX2FLOAT(3da1ab4b) << std::endl;
std::cout << HEX2FLOAT(7e21ab4b) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3bbab9a5) << " x " << HEX2FLOAT(3da1ab4b) << std::endl;
std::cout << HEX2FLOAT(3808323b) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3bbab9a5) << " x " << HEX2FLOAT(3da1ab4b) << std::endl;
std::cout << HEX2FLOAT(39ebd749) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3bbab9a5) << " x " << HEX2FLOAT(3da1ab4b) << std::endl;
std::cout << HEX2FLOAT(39ebd749) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3bbab9a5) << " x " << HEX2FLOAT(3da1ab4b) << std::endl;
std::cout << HEX2FLOAT(39ebd749) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3bbab9a5) << " x " << HEX2FLOAT(3da1ab4b) << std::endl;
std::cout << HEX2FLOAT(39ebd749) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3bbab9a5) << " x " << HEX2FLOAT(3da1ab4b) << std::endl;
std::cout << HEX2FLOAT(39ebd749) << std::endl;
std::cout << "--------------------------------" << std::endl;
std::cout << HEX2FLOAT(3bbab9a5) << " x " << HEX2FLOAT(3da1ab4b) << std::endl;
std::cout << HEX2FLOAT(39ebd749) << std::endl;
std::cout << "--------------------------------" << std::endl;
}
void main(){
Test_fpmul();
Verify();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////// Verilog Implement ///////////////////////////
//`define _DEBUG
module my_fpmul(clk, rst_n, dataa, datab, result
`ifdef _DEBUG
,_a1 ,_b1 ,_a2 ,_b2, _e1, _e2
,_ss
,_sum
, _se
,_sf1f2
,_sa1a2
,_sa1b2
,_sa2b1
`endif
);
input clk;
input rst_n;
input [31:0] dataa;
input [31:0] datab;
output [31:0] result;
`ifdef _DEBUG
output [8:0]_a1;
output [8:0]_b1;
output [8:0]_a2;
output [8:0]_b2;
output [8:0]_e1;
output [8:0]_e2;
output _ss;
output [26:0]_sum;
output [8:0]_se;
output [24:0]_sf1f2;
output [22:0]_sa1a2;
output [18:0]_sa1b2;
output [18:0]_sa2b1;
`endif
//clk 1
reg [31:0]datac = 32‘h0;
reg s1 = 1‘b0;
reg s2 = 1‘b0;
reg [8:0]e1 = 8‘b0;
reg [8:0]e2 = 8‘b0;
reg [22:0]f1 = 23‘b0;
reg [22:0]f2 = 23‘b0;
reg [17:0]a1 = 18‘b0;
reg [17:0]a2 = 18‘b0;
reg [17:0]b1 = 18‘b0;
reg [17:0]b2 = 18‘b0;
//clk 2
reg ss = 1‘b0;
reg [24:0]sf1f2 = 25‘b0;
reg [22:0]sa1a2 = 23‘b0;
reg [18:0]sa1b2 = 19‘b0;
reg [18:0]sa2b1 = 19‘b0;
reg [26:0]sum = 32‘b0;
reg [8:0]se = 9‘b0;
////clk 3
reg sss = 1‘b0;
reg [7:0]sse = 8‘b0;
reg [22:0]ssum = 23‘b0;
//pipline step 1
[email protected](posedge clk or negedge rst_n) begin
if (!rst_n) datac <= 32‘h0;
else begin
s1 <= dataa[31:31];
s2 <= datab[31:31];
e1 <= dataa[30:23];
e2 <= datab[30:23];
f1 <= dataa[22:0];
f2 <= datab[22:0];
a1 <= {9‘b0, dataa[22:14]};
a2 <= {9‘b0, datab[22:14]};
b1 <= {9‘b0, dataa[13:5]};
b2 <= {9‘b0, datab[13:5]};
end
end
//pipline step 2
[email protected](posedge clk or negedge rst_n) begin
if (!rst_n) begin
ss <= 1‘b0;
se <= 1‘b0;
sum <= 27‘b0;
end else begin
ss <= s1^s2;
se <= e1 + e2 - 8‘d127;
sf1f2 = (24‘b1 << 23) + (f1 + f2);
sa1a2 = {a1*a2,5‘b0};
sa1b2 = a1*b2;
sa2b1 = a2*b1;
sum <= sf1f2 + sa1a2 + ((sa1b2 + sa2b1)>>3‘d4);
//sum <= (26‘b1 << 23) + (f1 + f2) + {a1*a2,5‘b0} + {{9‘b0, a1}*{9‘b0, a2}, 5‘b0} + {9‘b0, a1}*{9‘b0, b2} + {9‘b0, a2}*{9‘b0, b1};
end
end
//pipline step 3
[email protected](posedge clk or negedge rst_n) begin
if (!rst_n) begin
sss <= 1‘b0;
sse <= 8‘b0;
ssum <= 23‘b0;
end else begin
sss <= ss;
if (sum[25]) begin
sse <= se + 2‘d2;
ssum <= sum[24:2];
end else if (sum[24]) begin
sse <= se + 2‘d1;
ssum <= sum[23:1];
end else begin
sse <= se;
ssum <= sum[22:0];
end
end
end
assign result = {sss, sse, ssum};
`ifdef _DEBUG
assign _e1 = e1;
assign _e2 = e2;
assign _a1 = a1;
assign _b1 = b1;
assign _a2 = a2;
assign _b2 = b2;
assign _ss = ss;
assign _sum = sum;
assign _se = se;
assign _sf1f2 = sf1f2;
assign _sa1a2 = sa1a2;
assign _sa1b2 = sa1b2;
assign _sa2b1 = sa2b1;
`endif
endmodule
//////////////////////////////////////////////////////////////////////////////
/////////////////////////////Test Bench////////////////////////////////////
//`define _DEBUG
`timescale 1 ns/ 1 ps
module my_fpmul_vlg_tst();
// constants
// general purpose registers
reg eachvec;
// test vector input registers
reg clk;
reg [31:0] dataa;
reg [31:0] datab;
reg rst_n;
// wires
wire [31:0] result;
`ifdef _DEBUG
wire [8:0] _a1, _b1, _a2, _b2;
wire[8:0]_e1;
wire[8:0]_e2;
wire _ss;
wire [26:0]_sum;
wire [8:0]_se;
wire [24:0]_sf1f2;
wire [22:0]_sa1a2;
wire [18:0]_sa1b2;
wire [18:0]_sa2b1;
`endif
// assign statements (if any)
my_fpmul i1 (
// port map - connection between master ports and signals/registers
.clk(clk),
.rst_n(rst_n),
.dataa(dataa),
.datab(datab),
.result(result)
`ifdef _DEBUG
,._a1(_a1) , ._b1(_b1) , ._a2(_a2) , ._b2(_b2), ._e1(_e1), ._e2(_e2)
,._ss(_ss) , ._sum(_sum) , ._se(_se)
,._sf1f2(_sf1f2)
,._sa1a2(_sa1a2)
,._sa1b2(_sa1b2)
,._sa2b1(_sa2b1)
`endif
);
initial begin
rst_n = 1;
clk = 0;
forever #10 clk = ~clk;
end
initial begin
repeat(30)
begin
#7
$display("%x * %x : ", dataa, datab);
#5
$display("%x", result);
#8
$display("\n");
end
end
initial begin
//dataa<=32‘h3bbab9a5; //0.0056984
//datab<=32‘h3bbab9a5; //0.0056984
dataa <= 32‘b0;
datab <= 32‘b0;
#5;
dataa<=32‘h3fc30f28; //1.5239
datab<=32‘h3fc30f28;
#20;
dataa<=32‘h40a7a9fc; //5.2395
datab<=32‘h40a7a9fc;
#20;
dataa<=32‘h4251954d; //52.3958
datab<=32‘h4251954d;
#20;
dataa<=32‘h4402fd52; //523.9581
datab<=32‘h4402fd52;
#20;
dataa<=32‘h4251954d; //52.3958
datab<=32‘h4402fd52; //523.9581
#20;
dataa<=32‘h3f07929f; //0.529581
datab<=32‘h3f07929f;
#20;
dataa<=32‘hbf07929f; //-0.529581
datab<=32‘h4402fd52; //523.9581
#20;
dataa<=32‘h4380650b; //256.7894
datab<=32‘h4380650b;
#20;
dataa<=32‘h3da1ab4b; //0.07894
datab<=32‘h3da1ab4b;
#20;
dataa<=32‘h3da1ab4b; //0.07894
datab<=32‘h3f07929f; //0.529581
#20;
dataa<=32‘h3da1ab4b; //0.07894
datab<=32‘h0; //0.529581
#20;
dataa<=32‘h3bbab9a5; //0.0056984
datab<=32‘h3bbab9a5; //0.0056984
#20;
dataa<=32‘h3bbab9a5; //0.0056984
datab<=32‘h3da1ab4b; //0.07894
#2000
$stop;
//$finish;
end
endmodule
//////////////////////////////////////////////////////////////////////////////////////////////