Skip to content

Commit c275784

Browse files
author
nvidia
committed
add jetson support
1 parent 106b4a9 commit c275784

File tree

9 files changed

+571
-12
lines changed

9 files changed

+571
-12
lines changed

Makefile

+7-2
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,17 @@ out:
1111
mkdir out
1212

1313
test:
14-
${PYTHON_BIN} tests/test.py
14+
# ${PYTHON_BIN} tests/test.py
1515
${PYTHON_BIN} tests/test-with-multiprocessing.py
1616

17-
pynvjpeg:
17+
pynvjpeg: build/lib/libcolor_space.a
18+
# rm build/lib.linux-aarch64-3.6/nvjpeg.cpython-36m-aarch64-linux-gnu.so -f
1819
${PYTHON_BIN} setup.py build
1920

21+
build/lib/libcolor_space.a: src/jetson/Utils/ColorSpace.cu
22+
mkdir -p build/lib
23+
nvcc -DCUDNN --compiler-options "-fPIC -lstdc++ -pthread -lm" -c src/jetson/Utils/ColorSpace.cu -o build/lib/libcolor_space.a
24+
2025
clean:
2126
rm -Rf out build dist pynvjpeg.egg-info
2227

include/JpegCoder.hpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#include <iostream>
77
#include <exception>
88

9-
class JpegCoderError: std::runtime_error{
9+
class JpegCoderError: public std::runtime_error{
1010
protected:
1111
int _code;
1212
public:
@@ -63,6 +63,11 @@ class JpegCoderBytes{
6363
this->size = size;
6464
}
6565

66+
JpegCoderBytes(unsigned char* data, size_t size){
67+
this->data = data;
68+
this->size = size;
69+
}
70+
6671
~JpegCoderBytes(){
6772
if(this->data!=nullptr){
6873
free(this->data);
@@ -73,9 +78,11 @@ class JpegCoderBytes{
7378
class JpegCoder{
7479
protected:
7580
static void* _global_context;
81+
void* _local_context;
7682
public:
7783
JpegCoder();
7884
~JpegCoder();
85+
void ensureThread(long threadIdent);
7986
JpegCoderImage* decode(const unsigned char* jpegData, size_t length);
8087
JpegCoderBytes* encode(JpegCoderImage* img, int quality);
8188
static void cleanUpEnv();

nvjpeg-python.cpp

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#include <stdio.h>
2-
#include <nvjpeg.h>
32
#include <malloc.h>
43
#include <sys/stat.h>
54
#include <sys/types.h>
65

76
#include <Python.h>
7+
#include <pythread.h>
88
#include <structmember.h>
99
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
1010
#include <numpy/arrayobject.h>
@@ -55,7 +55,14 @@ static PyObject* NvJpeg_decode(NvJpeg* Self, PyObject* Argvs)
5555
PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should jpegData byte string!");
5656
return NULL;
5757
}
58-
auto img = m_handle->decode((const unsigned char*)jpegData, len);
58+
JpegCoderImage* img;
59+
try{
60+
m_handle->ensureThread(PyThread_get_thread_ident());
61+
img = m_handle->decode((const unsigned char*)jpegData, len);
62+
}catch(JpegCoderError e){
63+
PyErr_Format(PyExc_ValueError, "%s, Code: %d", e.what(), e.code());
64+
return NULL;
65+
}
5966

6067
unsigned char* data = img->buffer();
6168

@@ -101,6 +108,7 @@ static PyObject* NvJpeg_encode(NvJpeg* Self, PyObject* Argvs)
101108
img->fill(buffer);
102109
Py_DECREF(bytes);
103110

111+
m_handle->ensureThread(PyThread_get_thread_ident());
104112
auto data = m_handle->encode(img, quality);
105113

106114
PyObject* rtn = PyBytes_FromStringAndSize((const char*)data->data, data->size);
@@ -145,6 +153,7 @@ static PyObject* NvJpeg_read(NvJpeg* Self, PyObject* Argvs)
145153

146154
fclose(fp);
147155

156+
m_handle->ensureThread(PyThread_get_thread_ident());
148157
auto img = m_handle->decode((const unsigned char*)jpegData, dataLength);
149158

150159
free(jpegData);

setup.py

+26-3
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,43 @@
11
#!/usr/bin/env python
22
import sys
33
import os
4+
import platform
45
import glob
56
from setuptools import setup, find_packages, Extension
7+
import numpy
68

79
with open("README.md", "r", encoding="utf-8") as fh:
810
long_description = fh.read()
911

1012
from distutils.core import setup, Extension
1113

12-
13-
extension_nvjpeg = Extension('nvjpeg', ['nvjpeg-python.cpp', 'src/x86/JpegCoder.cpp'], ['include'], [('JPEGCODER_ARCH', 'x86')])
14+
if platform.system() == 'Linux':
15+
if os.path.exists('/usr/src/jetson_multimedia_api'):
16+
# Jetson
17+
extension_nvjpeg = Extension('nvjpeg',
18+
[
19+
'nvjpeg-python.cpp', 'src/jetson/JpegCoder.cpp',
20+
'/usr/src/jetson_multimedia_api/samples/common/classes/NvJpegDecoder.cpp', '/usr/src/jetson_multimedia_api/samples/common/classes/NvJpegEncoder.cpp',
21+
'/usr/src/jetson_multimedia_api/samples/common/classes/NvBuffer.cpp', '/usr/src/jetson_multimedia_api/samples/common/classes/NvElement.cpp',
22+
'/usr/src/jetson_multimedia_api/samples/common/classes/NvLogging.cpp', '/usr/src/jetson_multimedia_api/samples/common/classes/NvElementProfiler.cpp',
23+
'/usr/src/jetson_multimedia_api/argus/samples/utils/CUDAHelper.cpp'
24+
],
25+
['include', '/usr/src/jetson_multimedia_api/argus/samples/utils', '/usr/src/jetson_multimedia_api/include', '/usr/src/jetson_multimedia_api/include/libjpeg-8b', numpy.get_include()],
26+
[('JPEGCODER_ARCH', 'jetson')],
27+
library_dirs=['/usr/lib/aarch64-linux-gnu/tegra', 'build/lib'],
28+
libraries=['color_space', 'cudart', 'nvjpeg', 'cuda']
29+
)
30+
else:
31+
# x86 or x86_64
32+
extension_nvjpeg = Extension('nvjpeg',
33+
['nvjpeg-python.cpp', 'src/x86/JpegCoder.cpp'],
34+
['include', numpy.get_include()],
35+
[('JPEGCODER_ARCH', 'x86')]
36+
)
1437

1538

1639
setup(name='pynvjpeg',
17-
version='0.0.10',
40+
version='0.0.11',
1841
ext_modules=[extension_nvjpeg],
1942
author="Usingnet",
2043
author_email="developer@usingnet.com",

src/jetson/JpegCoder.cpp

+213
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#include <JpegCoder.hpp>
2+
#include <NvJpegDecoder.h>
3+
#include <NvJpegEncoder.h>
4+
#include "Utils/ColorSpace.hpp"
5+
#include <cuda.h>
6+
#include <cuda_runtime.h>
7+
#include <CUDAHelper.h>
8+
#include <map>
9+
10+
#ifndef NVJPEG_MAX_COMPONENT
11+
#define NVJPEG_MAX_COMPONENT 4
12+
#endif
13+
14+
void* JpegCoder::_global_context = nullptr;
15+
16+
typedef struct
17+
{
18+
NvJPEGEncoder* nv_encoder;
19+
NvJPEGDecoder* nv_decoder;
20+
std::map<long, CUcontext*>* cudaContextMap;
21+
}NvJpegGlobalContext;
22+
23+
#define JPEGCODER_GLOBAL_CONTEXT ((NvJpegGlobalContext*)(JpegCoder::_global_context))
24+
25+
// typedef struct
26+
// {
27+
// }NvJpegLocalContext;
28+
29+
// #define JPEGCODER_LOCAL_CONTEXT ((NvJpegLocalContext*)(this->_local_context))
30+
31+
JpegCoderImage::JpegCoderImage(size_t width, size_t height, short nChannel, JpegCoderChromaSubsampling subsampling){
32+
this->img = malloc(width * height * nChannel);
33+
this->height = height;
34+
this->width = width;
35+
this->nChannel = nChannel;
36+
this->subsampling = subsampling;
37+
}
38+
39+
void JpegCoderImage::fill(const unsigned char* data){
40+
memcpy(img, data, width*height*nChannel);
41+
this->subsampling = JPEGCODER_CSS_444;
42+
}
43+
44+
unsigned char* JpegCoderImage::buffer(){
45+
void* rtn = malloc(width * height * nChannel);
46+
memcpy(rtn, img, width * height * nChannel);
47+
return (unsigned char*)rtn;
48+
}
49+
50+
JpegCoderImage::~JpegCoderImage(){
51+
free(this->img);
52+
this->img = nullptr;
53+
}
54+
55+
56+
JpegCoder::JpegCoder(){
57+
if(JpegCoder::_global_context == nullptr){
58+
JpegCoder::_global_context = malloc(sizeof(NvJpegGlobalContext));
59+
JPEGCODER_GLOBAL_CONTEXT->cudaContextMap = new std::map<long, CUcontext*>();
60+
JPEGCODER_GLOBAL_CONTEXT->nv_decoder = NvJPEGDecoder::createJPEGDecoder("nvjpeg-python:decoder");
61+
JPEGCODER_GLOBAL_CONTEXT->nv_encoder = NvJPEGEncoder::createJPEGEncoder("nvjpeg-python:encoder");
62+
}
63+
}
64+
65+
JpegCoder::~JpegCoder(){
66+
// ArgusSamples::cleanupCUDA((CUcontext*)_local_context);
67+
// free(_local_context);
68+
_local_context = nullptr;
69+
}
70+
71+
void JpegCoder::cleanUpEnv(){
72+
if(JpegCoder::_global_context != nullptr) {
73+
delete(JPEGCODER_GLOBAL_CONTEXT->nv_decoder);
74+
delete(JPEGCODER_GLOBAL_CONTEXT->nv_encoder);
75+
for(auto cudaContext: *(JPEGCODER_GLOBAL_CONTEXT->cudaContextMap)){
76+
ArgusSamples::cleanupCUDA(cudaContext.second);
77+
}
78+
delete(JPEGCODER_GLOBAL_CONTEXT->cudaContextMap);
79+
// ArgusSamples::cleanupCUDA(&(JPEGCODER_GLOBAL_CONTEXT->g_cudaContext));
80+
free(JpegCoder::_global_context);
81+
JpegCoder::_global_context = nullptr;
82+
}
83+
}
84+
85+
void JpegCoder::ensureThread(long threadIdent){
86+
// printf("threadIdent Id: %ld\n", threadIdent);
87+
if(JPEGCODER_GLOBAL_CONTEXT->cudaContextMap->count(threadIdent) == 0){
88+
CUcontext* context = (CUcontext*)malloc(sizeof(CUcontext));
89+
ArgusSamples::initCUDA(context);
90+
(*JPEGCODER_GLOBAL_CONTEXT->cudaContextMap)[threadIdent] = context;
91+
}
92+
}
93+
94+
JpegCoderImage* JpegCoder::decode(const unsigned char* jpegData, size_t length){
95+
NvJPEGDecoder* nv_decoder = JPEGCODER_GLOBAL_CONTEXT->nv_decoder;
96+
97+
uint32_t pixfmt, width, height;
98+
NvBuffer* buffer;
99+
int nReturnCode = nv_decoder->decodeToBuffer(&buffer, (unsigned char*)jpegData, length, &pixfmt, &width, &height);
100+
if (nReturnCode != 0){
101+
throw JpegCoderError(nReturnCode, "NvJpeg Decoder Error");
102+
}
103+
JpegCoderChromaSubsampling subsampling;
104+
switch (pixfmt)
105+
{
106+
case V4L2_PIX_FMT_YUV420M:
107+
subsampling = JPEGCODER_CSS_420;
108+
break;
109+
case V4L2_PIX_FMT_YUV444M:
110+
subsampling = JPEGCODER_CSS_444;
111+
break;
112+
default:
113+
throw JpegCoderError(pixfmt, "Unknown pixfmt");
114+
}
115+
JpegCoderImage* imgdesc = new JpegCoderImage(width, height, NVJPEG_MAX_COMPONENT, subsampling);
116+
char* img_data = (char*)imgdesc->img;
117+
int frameSize = 0;
118+
for (u_int32_t i = 0; i < buffer->n_planes; i++)
119+
{
120+
NvBuffer::NvBufferPlane &plane = buffer->planes[i];
121+
size_t bytes_to_write =
122+
plane.fmt.bytesperpixel * plane.fmt.width * plane.fmt.height;
123+
memcpy(img_data, plane.data, bytes_to_write);
124+
img_data += bytes_to_write;
125+
frameSize += bytes_to_write;
126+
}
127+
CUdeviceptr dpFrame = 0, nv12Frame = 0;
128+
CUresult error_code = cuMemAlloc(&dpFrame, width * height * 4);
129+
if(error_code != CUDA_SUCCESS){
130+
throw JpegCoderError(error_code, "cuMemAlloc Error");
131+
}
132+
error_code = cuMemAlloc(&nv12Frame, frameSize);
133+
if(error_code != CUDA_SUCCESS){
134+
throw JpegCoderError(error_code, "cuMemAlloc Error");
135+
}
136+
cudaError_t eCopy = cudaMemcpy((void*)nv12Frame, imgdesc->img, frameSize, cudaMemcpyHostToDevice);
137+
if(eCopy != cudaSuccess){
138+
throw JpegCoderError(error_code, cudaGetErrorString(eCopy));
139+
}
140+
switch(subsampling){
141+
case JPEGCODER_CSS_420:
142+
YUV420ToColor32<BGRA32>((uint8_t*)nv12Frame, width, (uint8_t *)dpFrame, 4 * width, width, height);
143+
break;
144+
case JPEGCODER_CSS_444:
145+
YUV444ToColor32<BGRA32>((uint8_t*)nv12Frame, width, (uint8_t *)dpFrame, 4 * width, width, height);
146+
break;
147+
default:
148+
throw JpegCoderError(pixfmt, "Unknown pixfmt");
149+
}
150+
int output_size = width * height * 4;
151+
eCopy = cudaMemcpy(imgdesc->img, (uint8_t*)dpFrame, output_size, cudaMemcpyDeviceToHost);
152+
if(eCopy != cudaSuccess){
153+
throw JpegCoderError(error_code, cudaGetErrorString(eCopy));
154+
}
155+
cuMemFree(dpFrame);
156+
cuMemFree(nv12Frame);
157+
delete(buffer);
158+
return imgdesc;
159+
}
160+
161+
JpegCoderBytes* JpegCoder::encode(JpegCoderImage* img, int quality){
162+
NvJPEGEncoder *nv_encodere = JPEGCODER_GLOBAL_CONTEXT->nv_encoder;
163+
164+
NvBuffer buffer(V4L2_PIX_FMT_YUV420M, img->width, img->height, 0);
165+
buffer.allocateMemory();
166+
167+
168+
CUdeviceptr bgrFrame = 0, yuvFrame = 0;
169+
CUresult error_code = cuMemAlloc(&bgrFrame, img->width * img->height * 3);
170+
if(error_code != CUDA_SUCCESS){
171+
throw JpegCoderError(error_code, "cuMemAlloc Error");
172+
}
173+
size_t yuvframeSize = img->width*img->height + ((int)(img->width/2) * (int)(img->height/2))*2;
174+
char* yuv_data = (char*)malloc(yuvframeSize);
175+
error_code = cuMemAlloc(&yuvFrame, yuvframeSize);
176+
if(error_code != CUDA_SUCCESS){
177+
throw JpegCoderError(error_code, "cuMemAlloc Error");
178+
}
179+
cudaError_t eCopy = cudaMemcpy((void*)bgrFrame, img->img, img->width * img->height * 3, cudaMemcpyHostToDevice);
180+
if(eCopy != cudaSuccess){
181+
throw JpegCoderError(error_code, cudaGetErrorString(eCopy));
182+
}
183+
BGRToYUV420((uint8_t*)bgrFrame, (uint8_t*)yuvFrame, img->width, img->height);
184+
eCopy = cudaMemcpy(yuv_data, (uint8_t*)yuvFrame, yuvframeSize, cudaMemcpyDeviceToHost);
185+
if(eCopy != cudaSuccess){
186+
throw JpegCoderError(error_code, cudaGetErrorString(eCopy));
187+
}
188+
189+
char* img_data = yuv_data;
190+
for (uint32_t i = 0; i < buffer.n_planes; i++)
191+
{
192+
NvBuffer::NvBufferPlane &plane = buffer.planes[i];
193+
char* data = (char *) plane.data;
194+
plane.bytesused = plane.fmt.stride * plane.fmt.height;
195+
memcpy(data, img_data, plane.bytesused);
196+
img_data+=plane.bytesused;
197+
}
198+
free(yuv_data);
199+
cuMemFree(bgrFrame);
200+
cuMemFree(yuvFrame);
201+
202+
203+
unsigned long out_buf_size = img->width * img->height * 3 / 2;
204+
unsigned char *out_buf = new unsigned char[out_buf_size];
205+
int nReturnCode = nv_encodere->encodeFromBuffer(buffer, JCS_YCbCr, &out_buf, out_buf_size, quality);
206+
if (0 != nReturnCode){
207+
throw JpegCoderError(nReturnCode, "NvJpeg Encoder Error");
208+
}
209+
210+
JpegCoderBytes* jpegData = new JpegCoderBytes(out_buf, out_buf_size);
211+
return jpegData;
212+
}
213+

0 commit comments

Comments
 (0)