Skip to content

Commit 0c8a76e

Browse files
authored
Add: GStreamer ST20 TX zero copy (#1173)
Add to GStreamer ST20 RX plugin zero-copy MTL implementation, Initial testing shows performance gains. Known issues: unsupported for now, rfc4175_422be10 would require memcpy use due to memory alignment requirements in our API. Same as in TX #1169
1 parent a45f727 commit 0c8a76e

File tree

2 files changed

+174
-33
lines changed

2 files changed

+174
-33
lines changed

ecosystem/gstreamer_plugin/gst_mtl_st20p_tx.c

Lines changed: 173 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,18 @@ typedef struct {
105105
GstCaps* caps;
106106
} GstMtlSt20pTxThreadData;
107107

108+
typedef struct {
109+
GstBuffer* buf;
110+
uint32_t child_count;
111+
pthread_mutex_t parent_mutex;
112+
} GstSt20pTxExternalDataParent;
113+
114+
typedef struct {
115+
GstSt20pTxExternalDataParent* parent;
116+
GstMemory* gst_buffer_memory;
117+
GstMapInfo map_info;
118+
} GstSt20pTxExternalDataChild;
119+
108120
/* pad template */
109121
static GstStaticPadTemplate gst_mtl_st20p_tx_sink_pad_template =
110122
GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS,
@@ -137,6 +149,11 @@ static gboolean gst_mtl_st20p_tx_start(GstBaseSink* bsink);
137149

138150
static gboolean gst_mtl_st20p_tx_session_create(Gst_Mtl_St20p_Tx* sink, GstCaps* caps);
139151

152+
static int gst_mtl_st20p_tx_frame_done(void* priv, struct st_frame* frame);
153+
154+
static GstFlowReturn gst_mtl_st20p_tx_zero_copy(Gst_Mtl_St20p_Tx* sink, GstBuffer* buf);
155+
static GstFlowReturn gst_mtl_st20p_tx_mem_copy(Gst_Mtl_St20p_Tx* sink, GstBuffer* buf);
156+
140157
static void gst_mtl_st20p_tx_class_init(Gst_Mtl_St20p_TxClass* klass) {
141158
GObjectClass* gobject_class;
142159
GstElementClass* gstelement_class;
@@ -349,6 +366,14 @@ static gboolean gst_mtl_st20p_tx_session_create(Gst_Mtl_St20p_Tx* sink, GstCaps*
349366
return FALSE;
350367
}
351368

369+
sink->zero_copy = (ops_tx.transport_fmt != st_frame_fmt_to_transport(ops_tx.input_fmt));
370+
if (sink->zero_copy) {
371+
ops_tx.flags |= ST20P_TX_FLAG_EXT_FRAME;
372+
ops_tx.notify_frame_done = gst_mtl_st20p_tx_frame_done;
373+
} else {
374+
GST_WARNING("Using memcpy path");
375+
}
376+
352377
if (info->fps_d != 0) {
353378
ops_tx.fps = st_frame_rate_to_st_fps((double)info->fps_n / info->fps_d);
354379
if (ops_tx.fps == ST_FPS_MAX) {
@@ -457,11 +482,6 @@ static gboolean gst_mtl_st20p_tx_sink_event(GstPad* pad, GstObject* parent,
457482
static GstFlowReturn gst_mtl_st20p_tx_chain(GstPad* pad, GstObject* parent,
458483
GstBuffer* buf) {
459484
Gst_Mtl_St20p_Tx* sink = GST_MTL_ST20P_TX(parent);
460-
gint buffer_size, buffer_n = gst_buffer_n_memory(buf);
461-
struct st_frame* frame = NULL;
462-
gint frame_size = sink->frame_size;
463-
GstMemory* gst_buffer_memory;
464-
GstMapInfo map_info;
465485

466486
if (sink->async_session_create) {
467487
pthread_mutex_lock(&sink->session_mutex);
@@ -480,6 +500,154 @@ static GstFlowReturn gst_mtl_st20p_tx_chain(GstPad* pad, GstObject* parent,
480500
return GST_FLOW_ERROR;
481501
}
482502

503+
if (sink->zero_copy) {
504+
return gst_mtl_st20p_tx_zero_copy(sink, buf);
505+
} else {
506+
return gst_mtl_st20p_tx_mem_copy(sink, buf);
507+
}
508+
}
509+
510+
static void gst_mtl_st20p_tx_finalize(GObject* object) {
511+
Gst_Mtl_St20p_Tx* sink = GST_MTL_ST20P_TX(object);
512+
513+
if (sink->async_session_create) {
514+
if (sink->session_thread) pthread_join(sink->session_thread, NULL);
515+
pthread_mutex_destroy(&sink->session_mutex);
516+
}
517+
518+
if (sink->tx_handle) {
519+
if (st20p_tx_free(sink->tx_handle)) GST_ERROR("Failed to free tx handle");
520+
}
521+
522+
if (sink->mtl_lib_handle) {
523+
if (gst_mtl_common_deinit_handle(&sink->mtl_lib_handle))
524+
GST_ERROR("Failed to uninitialize MTL library");
525+
}
526+
}
527+
528+
static gboolean plugin_init(GstPlugin* mtl_st20p_tx) {
529+
return gst_element_register(mtl_st20p_tx, "mtl_st20p_tx", GST_RANK_SECONDARY,
530+
GST_TYPE_MTL_ST20P_TX);
531+
}
532+
533+
GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, GST_VERSION_MINOR, mtl_st20p_tx,
534+
"software-based solution designed for high-throughput transmission",
535+
plugin_init, PACKAGE_VERSION, GST_LICENSE, GST_PACKAGE_NAME,
536+
GST_PACKAGE_ORIGIN)
537+
538+
static int gst_mtl_st20p_tx_frame_done(void* priv, struct st_frame* frame) {
539+
/* In case of format conversion (transmit vs input), MTL may call
540+
* gst_mtl_st20p_tx_frame_done twice.
541+
* To avoid double free, we set (frame->opaque = NULL) in first call so that the second
542+
* call can exit gracefully.
543+
*/
544+
if (frame == NULL || frame->opaque == NULL) {
545+
return 0;
546+
}
547+
548+
GstSt20pTxExternalDataChild* child = frame->opaque;
549+
GstSt20pTxExternalDataParent* parent = child->parent;
550+
551+
gst_memory_unmap(child->gst_buffer_memory, &child->map_info);
552+
553+
frame->opaque = NULL;
554+
free(child);
555+
556+
pthread_mutex_lock(&parent->parent_mutex);
557+
parent->child_count--;
558+
if (parent->child_count > 0) {
559+
pthread_mutex_unlock(&parent->parent_mutex);
560+
return 0;
561+
}
562+
563+
pthread_mutex_unlock(&parent->parent_mutex);
564+
gst_buffer_unref(parent->buf);
565+
pthread_mutex_destroy(&parent->parent_mutex);
566+
free(parent);
567+
568+
return 0;
569+
}
570+
571+
static GstFlowReturn gst_mtl_st20p_tx_zero_copy(Gst_Mtl_St20p_Tx* sink, GstBuffer* buf) {
572+
GstSt20pTxExternalDataChild* child;
573+
GstSt20pTxExternalDataParent* parent;
574+
struct st_frame* frame;
575+
struct st_ext_frame ext_frame;
576+
GstVideoMeta* video_meta = gst_buffer_get_video_meta(buf);
577+
gint buffer_n = gst_buffer_n_memory(buf);
578+
if (!video_meta) {
579+
g_print("Failed to get video meta from buffer\n");
580+
return GST_FLOW_ERROR;
581+
}
582+
583+
parent = malloc(sizeof(GstSt20pTxExternalDataParent));
584+
if (!parent) {
585+
GST_ERROR("Failed to allocate memory for parent structure");
586+
return GST_FLOW_ERROR;
587+
}
588+
parent->buf = buf;
589+
parent->child_count = buffer_n;
590+
pthread_mutex_init(&parent->parent_mutex, NULL);
591+
592+
for (int i = 0; i < buffer_n; i++) {
593+
child = malloc(sizeof(GstSt20pTxExternalDataChild));
594+
if (!child) {
595+
GST_ERROR("Failed to allocate memory for child structure");
596+
free(parent);
597+
}
598+
child->parent = parent;
599+
child->gst_buffer_memory = gst_buffer_peek_memory(buf, i);
600+
601+
if (!gst_memory_map(child->gst_buffer_memory, &child->map_info, GST_MAP_READ)) {
602+
GST_ERROR("Failed to map memory");
603+
free(child);
604+
free(parent);
605+
return GST_FLOW_ERROR;
606+
}
607+
608+
if (child->map_info.size < sink->frame_size) {
609+
GST_ERROR("Buffer size %lu is smaller than frame size %d", child->map_info.size,
610+
sink->frame_size);
611+
gst_memory_unmap(child->gst_buffer_memory, &child->map_info);
612+
free(child);
613+
free(parent);
614+
return GST_FLOW_ERROR;
615+
}
616+
617+
frame = st20p_tx_get_frame(sink->tx_handle);
618+
if (!frame) {
619+
GST_ERROR("Failed to get frame");
620+
return GST_FLOW_ERROR;
621+
}
622+
623+
// By default, timestamping is handled by MTL.
624+
if (sink->use_pts_for_pacing) {
625+
frame->timestamp = GST_BUFFER_PTS(buf) += sink->pts_for_pacing_offset;
626+
frame->tfmt = ST10_TIMESTAMP_FMT_TAI;
627+
}
628+
629+
for (int i = 0; i < video_meta->n_planes; i++) {
630+
ext_frame.addr[i] = child->map_info.data + video_meta->offset[i];
631+
ext_frame.linesize[i] = video_meta->stride[i];
632+
ext_frame.iova[i] = 0;
633+
}
634+
ext_frame.size = child->map_info.size;
635+
ext_frame.opaque = child;
636+
frame->opaque = NULL;
637+
638+
st20p_tx_put_ext_frame(sink->tx_handle, frame, &ext_frame);
639+
}
640+
641+
return GST_FLOW_OK;
642+
}
643+
644+
static GstFlowReturn gst_mtl_st20p_tx_mem_copy(Gst_Mtl_St20p_Tx* sink, GstBuffer* buf) {
645+
gint buffer_size, buffer_n = gst_buffer_n_memory(buf);
646+
struct st_frame* frame = NULL;
647+
gint frame_size = sink->frame_size;
648+
GstMemory* gst_buffer_memory;
649+
GstMapInfo map_info;
650+
483651
for (int i = 0; i < buffer_n; i++) {
484652
gst_buffer_memory = gst_buffer_peek_memory(buf, i);
485653

@@ -515,31 +683,3 @@ static GstFlowReturn gst_mtl_st20p_tx_chain(GstPad* pad, GstObject* parent,
515683
gst_buffer_unref(buf);
516684
return GST_FLOW_OK;
517685
}
518-
519-
static void gst_mtl_st20p_tx_finalize(GObject* object) {
520-
Gst_Mtl_St20p_Tx* sink = GST_MTL_ST20P_TX(object);
521-
522-
if (sink->async_session_create) {
523-
if (sink->session_thread) pthread_join(sink->session_thread, NULL);
524-
pthread_mutex_destroy(&sink->session_mutex);
525-
}
526-
527-
if (sink->tx_handle) {
528-
if (st20p_tx_free(sink->tx_handle)) GST_ERROR("Failed to free tx handle");
529-
}
530-
531-
if (sink->mtl_lib_handle) {
532-
if (gst_mtl_common_deinit_handle(&sink->mtl_lib_handle))
533-
GST_ERROR("Failed to uninitialize MTL library");
534-
}
535-
}
536-
537-
static gboolean plugin_init(GstPlugin* mtl_st20p_tx) {
538-
return gst_element_register(mtl_st20p_tx, "mtl_st20p_tx", GST_RANK_SECONDARY,
539-
GST_TYPE_MTL_ST20P_TX);
540-
}
541-
542-
GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, GST_VERSION_MINOR, mtl_st20p_tx,
543-
"software-based solution designed for high-throughput transmission",
544-
plugin_init, PACKAGE_VERSION, GST_LICENSE, GST_PACKAGE_NAME,
545-
GST_PACKAGE_ORIGIN)

ecosystem/gstreamer_plugin/gst_mtl_st20p_tx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct _Gst_Mtl_St20p_Tx {
5959
mtl_handle mtl_lib_handle;
6060
st20p_tx_handle tx_handle;
6161
guint frame_size;
62+
gboolean zero_copy;
6263

6364
gboolean session_ready;
6465
pthread_mutex_t session_mutex;

0 commit comments

Comments
 (0)