This repository was archived by the owner on Dec 1, 2021. It is now read-only.
This repository was archived by the owner on Dec 1, 2021. It is now read-only.
go BLAS #17
Open
Description
Hi,
I am trying to convert openblas to go with this project.
A simple C code:
// degmm.c
#include <cblas.h>
void degmm()
{
int i=0;
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);
}
int main () {
degmm();
}
Use the following command:
clang-6.0 -O3 -mavx -mfma -masm=intel -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -S degmm.c
Will give the following asm code degmm.s
:
.text
.intel_syntax noprefix
.file "degmm.c"
.section .rodata.cst32,"aM",@progbits,32
.p2align 5 # -- Begin function degmm
.LCPI0_0:
.quad 4607182418800017408 # double 1
.quad 4611686018427387904 # double 2
.quad 4607182418800017408 # double 1
.quad -4609434218613702656 # double -3
.section .rodata.cst8,"aM",@progbits,8
.p2align 3
.LCPI0_1:
.quad 4607182418800017408 # double 1
.LCPI0_2:
.quad 4611686018427387904 # double 2
.text
.globl degmm
.p2align 4, 0x90
.type degmm,@function
degmm: # @degmm
# %bb.0:
sub rsp, 168
vmovaps ymm0, ymmword ptr [rip + .LCPI0_0] # ymm0 = [1.000000e+00,2.000000e+00,1.000000e+00,-3.000000e+00]
vmovups ymmword ptr [rsp + 48], ymm0
movabs rax, 4616189618054758400
mov qword ptr [rsp + 80], rax
movabs rcx, -4616189618054758400
mov qword ptr [rsp + 88], rcx
vmovups ymmword ptr [rsp], ymm0
mov qword ptr [rsp + 32], rax
mov qword ptr [rsp + 40], rcx
mov rax, qword ptr [rip + .Ldegmm.C+64]
mov qword ptr [rsp + 160], rax
vmovups ymm0, ymmword ptr [rip + .Ldegmm.C+32]
vmovups ymmword ptr [rsp + 128], ymm0
vmovups ymm0, ymmword ptr [rip + .Ldegmm.C]
vmovups ymmword ptr [rsp + 96], ymm0
lea rax, [rsp + 96]
mov r10, rsp
lea r11, [rsp + 48]
vmovsd xmm0, qword ptr [rip + .LCPI0_1] # xmm0 = mem[0],zero
vmovsd xmm1, qword ptr [rip + .LCPI0_2] # xmm1 = mem[0],zero
mov edi, 102
mov esi, 111
mov edx, 112
mov ecx, 3
mov r8d, 3
mov r9d, 2
push 3
push rax
push 3
push r10
push 3
push r11
vzeroupper
call cblas_dgemm
add rsp, 216
ret
.Lfunc_end0:
.size degmm, .Lfunc_end0-degmm
# -- End function
.section .rodata.cst32,"aM",@progbits,32
.p2align 5 # -- Begin function main
.LCPI1_0:
.quad 4607182418800017408 # double 1
.quad 4611686018427387904 # double 2
.quad 4607182418800017408 # double 1
.quad -4609434218613702656 # double -3
.section .rodata.cst8,"aM",@progbits,8
.p2align 3
.LCPI1_1:
.quad 4607182418800017408 # double 1
.LCPI1_2:
.quad 4611686018427387904 # double 2
.text
.globl main
.p2align 4, 0x90
.type main,@function
main: # @main
# %bb.0:
sub rsp, 168
vmovaps ymm0, ymmword ptr [rip + .LCPI1_0] # ymm0 = [1.000000e+00,2.000000e+00,1.000000e+00,-3.000000e+00]
vmovups ymmword ptr [rsp + 48], ymm0
movabs rax, 4616189618054758400
mov qword ptr [rsp + 80], rax
movabs rcx, -4616189618054758400
mov qword ptr [rsp + 88], rcx
vmovups ymmword ptr [rsp], ymm0
mov qword ptr [rsp + 32], rax
mov qword ptr [rsp + 40], rcx
mov rax, qword ptr [rip + .Ldegmm.C+64]
mov qword ptr [rsp + 160], rax
vmovups ymm0, ymmword ptr [rip + .Ldegmm.C+32]
vmovups ymmword ptr [rsp + 128], ymm0
vmovups ymm0, ymmword ptr [rip + .Ldegmm.C]
vmovups ymmword ptr [rsp + 96], ymm0
lea rax, [rsp + 96]
mov r10, rsp
lea r11, [rsp + 48]
vmovsd xmm0, qword ptr [rip + .LCPI1_1] # xmm0 = mem[0],zero
vmovsd xmm1, qword ptr [rip + .LCPI1_2] # xmm1 = mem[0],zero
mov edi, 102
mov esi, 111
mov edx, 112
mov ecx, 3
mov r8d, 3
mov r9d, 2
push 3
push rax
push 3
push r10
push 3
push r11
vzeroupper
call cblas_dgemm
add rsp, 48
xor eax, eax
add rsp, 168
ret
.Lfunc_end1:
.size main, .Lfunc_end1-main
# -- End function
.type .Ldegmm.C,@object # @degmm.C
.section .rodata,"a",@progbits
.p2align 4
.Ldegmm.C:
.quad 4602678819172646912 # double 0.5
.quad 4602678819172646912 # double 0.5
.quad 4602678819172646912 # double 0.5
.quad 4602678819172646912 # double 0.5
.quad 4602678819172646912 # double 0.5
.quad 4602678819172646912 # double 0.5
.quad 4602678819172646912 # double 0.5
.quad 4602678819172646912 # double 0.5
.quad 4602678819172646912 # double 0.5
.size .Ldegmm.C, 72
.ident "clang version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)"
.section ".note.GNU-stack","",@progbits
Then converting to go asm:
c2goasm -a degmm.s Degmm.s
Report the following error:
Processing cpp/degmm.s
panic: 'sub rsp' found but in unexpected scenario: sub rsp, 168
goroutine 1 [running]:
main.(*Epilogue).isPrologueInstruction(0xc0000575b8, 0xc0000171a0, 0xd, 0xd)
/home/bzhou/go/src/github.com/minio/c2goasm/epilogue.go:205 +0x3d4
main.getPrologueLines(0xc0000d0150, 0x26, 0xeb, 0xc0000575b8, 0x0)
/home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:231 +0xb4
main.(*Subroutine).removePrologueLines(0xc000057590, 0xc0000d0000, 0x8d, 0x100, 0x15, 0x3b)
/home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:134 +0x87
main.extractSubroutine(0x3a, 0xc0000d0000, 0x8d, 0x100, 0x11, 0xc000017188, 0x5, 0x14, 0x0, 0x0, ...)
/home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:127 +0x2cd
main.segmentSource(0xc0000d0000, 0x8d, 0x100, 0x20, 0xf, 0xc)
/home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:85 +0x20e
main.process(0xc0000d0000, 0x8d, 0x100, 0xc000017058, 0x8, 0x0, 0x0, 0x0, 0x0, 0x0)
/home/bzhou/go/src/github.com/minio/c2goasm/c2goasm.go:78 +0x80
main.main()
/home/bzhou/go/src/github.com/minio/c2goasm/c2goasm.go:264 +0x37e
Any suggestion?
Metadata
Metadata
Assignees
Labels
No labels