ÿØÿà JFIF ` ` ÿþ
|
Server : Apache System : Linux cloud.heroica.com.br 4.18.0-553.36.1.el8_10.x86_64 #1 SMP Wed Jan 22 03:07:54 EST 2025 x86_64 User : farolpborg ( 1053) PHP Version : 7.4.33 Disable Function : exec,passthru,shell_exec,system Directory : /proc/self/root/usr/src/file_protector-1.1-1505/transport/ |
Upload File : |
/**
@file
@brief Message transport between kernel and userspace
@details Copyright (c) 2017-2021 Acronis International GmbH
@author Mikhail Krivtsov (mikhail.krivtsov@acronis.com)
@since $Id: $
*/
#include "transport.h"
#include "compat.h"
#include "debug.h"
#include "device.h"
#include "memory.h"
#include "message.h"
#include "ring.h"
#include "set.h"
#include "syscall_common.h"
#include "task_info_map.h"
#include "tracepoints.h"
#include <asm/io.h>
#include <linux/fs.h>
#include <linux/list.h>
#include <linux/jiffies.h> // msecs_to_jiffies()
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h> // copy_from_user(), copy_to_user()
#include <linux/wait.h> // wait_event*(), wake_up*()
#define TRANSPORT_MSG_SIZE_MAX (1<<10)
#define TRANSPORT_QUEUE_CAPACITY (0x1000 / sizeof(msg_t *))
#define TRANSPORT_WAIT_REPLY_TIMEOUT_MSECS (60*1000)
#define TRANSPORT_PRINTF(format, args...) DPRINTF(format, ##args)
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#define DATA_QUEUE_HEADER_SIZE sizeof(shared_data_queue_t)
#define DATA_QUEUE_ENTRY_HEADER_SIZE sizeof(data_queue_entry_t)
typedef struct {
struct list_head transport_list_node;
pid_t control_tgid;
atomic64_t events_mask;
// FIXME: Use 'msg_wait_queue.lock' instead of 'msg_spinlock'
// #define msg_spinlock msg_wait_queue.lock
spinlock_t msg_spinlock;
wait_queue_head_t msg_wait_queue;
bool shutdown;
ring_t msg_ring;
// sent messages waiting for 'reply'
set_t sent_msgs_set;
uint32_t queue_size;
shared_data_queue_t *queue;
atomic_t queue_event;
} transport_t;
typedef struct {
struct mutex transport_mutex;
unsigned transport_count;
atomic64_t combined_events_mask;
spinlock_t transport_spinlock;
struct list_head transport_list;
msg_id_t msg_id_sequence;
} transport_global_t;
static transport_global_t transport_global;
// Virtual memory allocators that are suitable for 'mmap'
static void *virt_mem_alloc(size_t size)
{
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP | __GFP_NORETRY;
return (void *) __get_free_pages(gfp_flags, get_order(size));
}
static void virt_mem_free(void *ptr, size_t size)
{
struct page *page;
if (!ptr)
return;
page = virt_to_head_page(ptr);
__free_pages(page, get_order(size));
}
static void transport_global_init(void)
{
mutex_init(&transport_global.transport_mutex);
transport_global.transport_count = 0;
spin_lock_init(&transport_global.transport_spinlock);
INIT_LIST_HEAD(&transport_global.transport_list);
transport_global.msg_id_sequence = 0;
}
static void transport_global_register(transport_t *transport)
{
spin_lock(&transport_global.transport_spinlock);
list_add_tail(&transport->transport_list_node, &transport_global.transport_list);
spin_unlock(&transport_global.transport_spinlock);
}
static void transport_global_unregister(transport_t *transport)
{
spin_lock(&transport_global.transport_spinlock);
if (!list_empty(&transport->transport_list_node)) {
list_del_init(&transport->transport_list_node);
}
spin_unlock(&transport_global.transport_spinlock);
}
static bool transport_is_control_tgid_impl(pid_t tgid)
{
transport_t *transport;
bool is_control_tgid = false;
list_for_each_entry(transport, &transport_global.transport_list, transport_list_node) {
if (tgid == transport->control_tgid) {
is_control_tgid = true;
break;
}
}
return is_control_tgid;
}
static bool transport_is_control_tgid(pid_t tgid)
{
bool is_control_tgid;
spin_lock(&transport_global.transport_spinlock);
is_control_tgid = transport_is_control_tgid_impl(tgid);
spin_unlock(&transport_global.transport_spinlock);
return is_control_tgid;
}
static msg_id_t transport_global_sequence_next_impl(void)
{
msg_id_t msg_id = ++transport_global.msg_id_sequence;
if (!msg_id) {
msg_id = ++transport_global.msg_id_sequence;
}
DPRINTF("msg_id=%llX", msg_id);
return msg_id;
}
static msg_id_t transport_global_sequence_next(void)
{
// TODO DK: use 'atomic64' instead of spinlock
msg_id_t msg_id;
spin_lock(&transport_global.transport_spinlock);
msg_id = transport_global_sequence_next_impl();
spin_unlock(&transport_global.transport_spinlock);
return msg_id;
}
static void transport_global_recalculate_combined_event_mask_impl(void)
{
transport_t *transport;
uint64_t combined_mask = 0;
list_for_each_entry(transport, &transport_global.transport_list, transport_list_node) {
barrier();
combined_mask |= atomic64_read(&transport->events_mask);
barrier();
}
barrier();
atomic64_set(&transport_global.combined_events_mask, combined_mask);
}
static void transport_global_recalculate_combined_event_mask(void)
{
spin_lock(&transport_global.transport_spinlock);
transport_global_recalculate_combined_event_mask_impl();
spin_unlock(&transport_global.transport_spinlock);
}
uint64_t transport_global_get_combined_mask(void)
{
return atomic64_read(&transport_global.combined_events_mask);
}
static void drop_msgs_impl(ring_t *ring)
{
while (!ring_is_empty(ring)) {
msg_t *msg = *(msg_t **) ring_consumer_ptr(ring);
msg_unref(msg);
ring_consumer_index_move_one(ring);
}
}
/*
'msg ref/unref' for messages stored in 'sent_msgs_set' are invoked in
'msg_reply_wait_count inc/dec'.
There is no need for separate 'msg ref/unref' calls.
*/
static void drop_sent_msgs_impl(set_t *set)
{
void *item_ptr = set_begin_ptr(set);
void *end_ptr = set_end_ptr(set);
while (item_ptr < end_ptr) {
msg_t *msg = *(msg_t **) item_ptr;
msg_reply_wait_count_dec(msg);
item_ptr = set_ptr_next(set, item_ptr);
}
set->count = 0;
}
static void transport_shutdown(transport_t *transport)
{
DPRINTF("transport=%p", transport);
spin_lock(&transport->msg_spinlock);
{
atomic64_set(&transport->events_mask, 0);
transport->shutdown = true;
// Discard undelivered messages
drop_msgs_impl(&transport->msg_ring);
// Discard messages waiting for 'reply'
drop_sent_msgs_impl(&transport->sent_msgs_set);
}
spin_unlock(&transport->msg_spinlock);
// wakeup all userspace 'read' waiters
wake_up_all(&transport->msg_wait_queue);
}
// identify and shutdown transport failed to reply
static void transport_shutdown_msg(transport_t *transport, msg_t *unreplied_msg)
{
bool found = false;
DPRINTF("transport=%p unreplied_msg=%p", transport, unreplied_msg);
spin_lock(&transport->msg_spinlock);
{
void *item_ptr = set_begin_ptr(&transport->sent_msgs_set);
void *end_ptr = set_end_ptr(&transport->sent_msgs_set);
while (item_ptr < end_ptr) {
if (unreplied_msg == *(msg_t **) item_ptr) {
found = true;
break;
}
item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr);
}
}
spin_unlock(&transport->msg_spinlock);
if (found) {
WPRINTF("deativating transport on reply wait timeout");
transport_shutdown(transport);
}
}
// identify and shutdown transport failed to reply
static void transport_global_shutdown_msg(msg_t *unreplied_msg)
{
DPRINTF("unreplied_msg=%p", unreplied_msg);
spin_lock(&transport_global.transport_spinlock);
{
transport_t *transport;
transport_t *next_transport;
list_for_each_entry_safe(transport, next_transport,
&transport_global.transport_list,
transport_list_node) {
transport_shutdown_msg(transport, unreplied_msg);
}
}
spin_unlock(&transport_global.transport_spinlock);
}
static void transport_free(transport_t *transport)
{
DPRINTF("transport=%p", transport);
transport_global_unregister(transport);
transport_shutdown(transport);
IPRINTF("message queue items_count_max=%u capacity=%u",
ring_items_count_max(&transport->msg_ring),
ring_capacity(&transport->msg_ring));
IPRINTF("sent_msgs_set items_count_max=%u capacity=%u",
set_items_count_max(&transport->sent_msgs_set),
set_capacity(&transport->sent_msgs_set));
mem_free(ring_buffer(&transport->msg_ring));
mem_free(set_buffer(&transport->sent_msgs_set));
if (transport->queue) {
virt_mem_free(transport->queue, transport->queue_size + DATA_QUEUE_HEADER_SIZE);
}
mem_free(transport);
}
static bool transport_ring_init(ring_t *ring)
{
size_t buffer_size = TRANSPORT_QUEUE_CAPACITY * sizeof(msg_t *);
msg_t **msgs;
bool success;
if (!buffer_size) {
msgs = NULL;
success = true;
} else {
msgs = mem_alloc0(buffer_size);
success = (bool) msgs;
}
ring_init(ring, msgs, buffer_size, sizeof(msg_t *));
return success;
}
static bool transport_set_init(set_t *set)
{
size_t buffer_size = TRANSPORT_QUEUE_CAPACITY * sizeof(msg_t *);
msg_t **msgs;
bool success;
if (!buffer_size) {
msgs = NULL;
success = true;
} else {
msgs = mem_alloc0(buffer_size);
success = (bool) msgs;
}
set_init(set, msgs, buffer_size, sizeof(msg_t *));
return success;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Shared with userspace Data Queue implementation
// Some defines for compiler atomic hacks. We only care about x86_64 here, on other platforms those are NOT valid.
// Other platforms must provider their implementations for 'smp_*' & 'READ/WRITE_ONCE'
// Generally those convenience memory ordering functions are available in Linux 3.12 & Linux 3.18.
// I assume it will not be necessary to support any other architectures other than x86_64 on kernels that old.
#ifndef READ_ONCE
#define __READ_ONCE_SIZE \
({ \
switch (size) { \
case 1: *(__u8 *)res = *(volatile __u8 *)p; break; \
case 2: *(__u16 *)res = *(volatile __u16 *)p; break; \
case 4: *(__u32 *)res = *(volatile __u32 *)p; break; \
case 8: *(__u64 *)res = *(volatile __u64 *)p; break; \
default: \
barrier(); \
__builtin_memcpy((void *)res, (const void *)p, size); \
barrier(); \
} \
})
static void _do_read_once_size(const volatile void *p, void *res, int size)
{
__READ_ONCE_SIZE;
}
#define READ_ONCE(x) \
({ \
union { typeof(x) __val; char __c[1]; } __u; \
_do_read_once_size(&(x), __u.__c, sizeof(x)); \
smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
__u.__val; \
})
#endif
#ifndef WRITE_ONCE
static void _do_write_once_size(volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
default:
barrier();
__builtin_memcpy((void *)p, (const void *)res, size);
barrier();
}
}
#define WRITE_ONCE(x, val) \
({ \
union { typeof(x) __val; char __c[1]; } __u = \
{ .__val = (__force typeof(x)) (val) }; \
_do_write_once_size(&(x), __u.__c, sizeof(x)); \
__u.__val; \
})
#endif
#ifndef smp_store_release
#define smp_store_release(p, v) \
do { \
barrier(); \
WRITE_ONCE(*p, v); \
} while (0)
#endif
#ifndef smp_load_acquire
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = READ_ONCE(*p); \
barrier(); \
___p1; \
})
#endif
#define DATA_QUEUE_ENTRY_AT(queue, v) (data_queue_entry_t*)((uint8_t *)queue->entries + v)
#ifdef ROUND_UP
#undef ROUND_UP
#endif
#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S))
static inline void data_queue_write_new_entry(shared_data_queue_t *queue, uint32_t offset, void *data, uint32_t data_size)
{
data_queue_entry_t *entry = DATA_QUEUE_ENTRY_AT(queue, offset);
// Technically this 'WRITE_ONCE' is useless but let's follow the rules
WRITE_ONCE(entry->size, data_size);
// Not protecting the memcpy with 'barrier' call, we will use 'smp_*' later anyways
memcpy(&entry->data, data, data_size);
}
// This function is called from data queue 'writer' under the spin_lock as it is NOT thread-safe
// As such, reads from 'queue->tail' can be done use 'READ_ONCE' (relaxed), writes must be done using 'smp_store_release'.
// 'reader' may decide to alter 'queue->head' so 'smp_load_acquire' must be used to read it, writes are NOT allowed.
static bool transport_shared_data_queue_enqueue_impl(transport_t *transport, void *data, uint32_t data_size)
{
uint32_t head, tail, new_tail;
// 'entry_size' is the size of the whole 'data_queue_entry' including the header
// in the 'data_queue_entry' 'data_size' is written instead to avoid useless checks.
uint32_t entry_size = data_size + DATA_QUEUE_ENTRY_HEADER_SIZE;
shared_data_queue_t *queue = transport->queue;
uint32_t queue_size = transport->queue_size;
// Notice that we are not doing any memory shenanigans here to load tail & head.
// The barriers will be done later if it will appear that they are necessary.
// !!! 'head' might not be synchronized with 'reader', it is OK and will be handled in the end.
tail = READ_ONCE(queue->tail);
head = smp_load_acquire(&queue->head);
// Check for unreasonable 'tail' or 'head', must never happen.
if (queue_size < tail || queue_size < head) {
WPRINTF("Invalid tail/head detected: tail=%u, head=%u, size=%u"
, (unsigned) tail, (unsigned) head, (unsigned) queue_size);
return false;
}
// Start inserting the contents of 'data' in the shared data queue
if (tail >= head) {
// Tail is further than head, it is a regular scenario. Handle it
// head tail
// V V
// -----|*************|-----------------
// ^ ^
// data to be dequeued |
// free space
if ((tail + entry_size) <= queue_size) {
// There is enough buffer in the 'tail' of the queue, write the entry and move the tail
// head tail new_tail
// V V V
// -----|*************|+++++++|-------
// ^
// new entry
data_queue_write_new_entry(queue, tail /*off*/, data, data_size);
new_tail = tail + entry_size;
} else if (head > entry_size) {
// As first condition did not satisfy, cannot put data after 'tail'
// Have to loop back to the start and there is enough space before userspace 'head'
// head tail
// V V
// |++++++|------------|*************|?? <- zapped entry w/ size>queue_size-tail, if fits
// ^ ^
// off new_tail
// Need to explain the userspace that current entry is too long to fit.
// If there is not enough space to even place a entry header, do nothing.
// Otherwise, deliberately zap the entry by putting 'data_size' that is too big.
if ((queue_size - tail) >= DATA_QUEUE_ENTRY_HEADER_SIZE) {
data_queue_entry_t *entry_to_zap = DATA_QUEUE_ENTRY_AT(queue, tail);
entry_to_zap->size = data_size;
// do not touch 'entry_to_zap->data', it is bogus. entry just says go to the start
}
// Write data at the beginning of the queue
data_queue_write_new_entry(queue, 0 /*off*/, data, data_size);
new_tail = /*off==0 + */ entry_size;
} else {
// There is neither enough space after 'tail' nor before 'head', bail
return false;
}
} else {
// Catching to the 'head' from the other size.
// tail head
// V V
// ****|--------------|***************
// Insert can still be done if 'head' will not be overrun
if ((head - tail) > entry_size) {
// tail head
// V V
// ****|+++++|------|***************
// ^
// new_tail
data_queue_write_new_entry(queue, tail, data, data_size);
new_tail = tail + entry_size;
} else {
// There is not enough space not to overrun 'head', bail
return false;
}
}
// Expose all the content written in this thread as per 'release' semantics.
// Reader must do 'smp_store_acquire' on the same variable ('tail') to see 'entries' written.
// !!! This logic does NOT enforce 'tail' in 'reader' to be equal to 'tail' in 'writer'
new_tail = ROUND_UP(new_tail, sizeof(uint32_t));
smp_store_release(&queue->tail, new_tail);
// The new tail was published to the 'queue' but is it necessary to notify the 'reader'?
// If in the beginning 'tail == head', it means that userspace has finished reading all the
// content and is going to wait or is already waiting for the 'event'.
// In such case it is clear that will must notify the 'reader' no matter what.
// Moreover 'reader' cannot move the 'head' past the 'tail' so it is guaranteed that it is
// indeed the latest published 'head' by the reader.
if (tail != head) {
// If 'tail != head', it is not as clear. If it so happened that userspace moved the 'head'
// to be equal to 'tail' while 'writer' was adding the new entry, 'reader' will go 'wait'.
// So we must refresh the 'head' to ensure we actually do not need to wakeup the 'reader'.
// The other situation is also valid - 'writer' might delay writes to the head as 'atomic' ops.
// We need to make sure userspace will continue consuming events as we wrote the 'tail'.
// Whenever userspace will detect that its current 'tail==head', it will perform 'smb_mb'
// to fetch the new 'tail' we just wrote to ensure it does not need to consume anymore.
smp_mb();
head = READ_ONCE(queue->head);
}
if (tail == head) {
// atomic_ops.rst: atomic_read() and atomic_set() DO NOT IMPLY BARRIERS!
atomic_set(&transport->queue_event, 1);
// The data queue was empty, wakeup the 'reader' which is waiting for us.
// Use 'smp_wmb' to make sure 'tail' that we stored will be seen by the user.
// It is also fine if we did 'smp_mb' before, we will pair with 'smb_rmb' just fine.
// Also using 'smp_wmb' to ensure 'atomic_set' did set the 'queue_event'.
smp_wmb();
wake_up_interruptible(&transport->msg_wait_queue);
TRANSPORT_PRINTF("woken up ht=%u nt=%u", tail, new_tail);
}
return true;
}
static long transport_queue_events_available(transport_t *transport)
{
uint32_t tail, head;
int ev;
shared_data_queue_t *queue = READ_ONCE(transport->queue);
smp_rmb();
ev = atomic_xchg(&transport->queue_event, 0);
if (ev) {
TRANSPORT_PRINTF("check ev active");
return 1;
}
// This should not be necessary but doing it just in case.
tail = READ_ONCE(queue->tail);
head = READ_ONCE(queue->head);
TRANSPORT_PRINTF("check h=%u t=%u", head, tail);
return transport->shutdown || (head != tail);
}
// This function is called whenever userspace 'reader' deemed that there are no more events to read.
// It will be waiting for the data queue to gain new content using 'msg_wait_queue'.
// 'wake_up_interruptible' does 'wakeup' when it detects that the queue is being empty.
static long transport_data_queue_wait(transport_t *transport)
{
shared_data_queue_t *queue = READ_ONCE(transport->queue);
long ret;
if (!queue) {
return -EINVAL;
}
if (wait_event_interruptible_exclusive(transport->msg_wait_queue, transport_queue_events_available(transport))) {
ret = -EINTR;
} else {
if (transport->shutdown) {
ret = -EIO;
} else {
ret = 0;
}
}
return ret;
}
static int transport_data_queue_mmap(transport_t *transport, struct vm_area_struct *vma)
{
loff_t offset = (loff_t) vma->vm_pgoff << PAGE_SHIFT;
unsigned long sz = vma->vm_end - vma->vm_start;
unsigned long pfn;
struct page *page;
void *ptr;
if (0 != offset) {
return -EINVAL;
}
ptr = READ_ONCE(transport->queue);
if (!ptr) {
return -EINVAL;
}
page = virt_to_head_page(ptr);
if (sz > (PAGE_SIZE << compound_order(page))) {
return -EINVAL;
}
pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
}
static long data_queue_create(const data_queue_params_t *params, shared_data_queue_t **pqueue)
{
shared_data_queue_t *queue;
uint32_t size = params->size;
if (size <= DATA_QUEUE_HEADER_SIZE)
return -EINVAL;
queue = (shared_data_queue_t*) virt_mem_alloc(size);
if (!queue)
return -ENOMEM;
*pqueue = queue;
return 0;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static bool transport_send_msg_nowait(transport_t *transport, msg_t *msg)
{
bool need_wakeup = false;
msg_type_t mt = MSG_TYPE(msg);
if (mt >= MT_FIRST_ACTIVITY_EVENT
&& !(atomic64_read(&transport->events_mask) & MSG_TYPE_TO_EVENT_MASK(mt))) {
return false;
}
spin_lock(&transport->msg_spinlock);
{
if (transport->shutdown) {
spin_unlock(&transport->msg_spinlock);
return false;
}
if (msg_img_is_reply_required(MSG_IMG(msg))) {
unsigned item_index;
if (set_is_full(&transport->sent_msgs_set)) {
WPRINTF("'sent_msgs_set' overflow (capacity=%u)", set_capacity(&transport->sent_msgs_set));
spin_unlock(&transport->msg_spinlock);
transport_shutdown(transport);
return false;
}
item_index = set_items_count(&transport->sent_msgs_set);
/*
'msg ref/unref' for messages stored in 'sent_msgs_set' are invoked in
'msg_reply_wait_count inc/dec'.
There is no need for separate 'msg ref/unref' calls.
*/
*(msg_t **) set_item_ptr(&transport->sent_msgs_set, item_index) = msg_reply_wait_count_inc(msg);
set_items_count_set(&transport->sent_msgs_set, item_index + 1);
}
if (transport->queue) {
need_wakeup = false;
if (!transport_shared_data_queue_enqueue_impl(transport, MSG_IMG(msg), MSG_SIZE(msg))) {
WPRINTF("mmaped queue overflow");
spin_unlock(&transport->msg_spinlock);
transport_shutdown(transport);
return false;
}
} else {
need_wakeup = true;
if (ring_is_full(&transport->msg_ring)) {
WPRINTF("message queue overflow (capacity=%u)", ring_capacity(&transport->msg_ring));
spin_unlock(&transport->msg_spinlock);
transport_shutdown(transport);
return false;
}
*(msg_t **) ring_producer_ptr(&transport->msg_ring) = msg_ref(msg);
ring_producer_index_move_one(&transport->msg_ring);
}
}
spin_unlock(&transport->msg_spinlock);
if (need_wakeup) {
// wakeup userspace reader
wake_up_interruptible_sync(&transport->msg_wait_queue);
}
return true;
}
static bool transport_send_hello_nowait(transport_t *transport)
{
msg_t *msg = hello_msg_new();
bool success;
if (!msg) {
success = false;
} else {
success = transport_send_msg_nowait(transport, msg);
msg_unref(msg);
}
return success;
}
static bool send_msg_nowait(msg_t *msg)
{
bool sent = false;
spin_lock(&transport_global.transport_spinlock);
if (!transport_is_control_tgid_impl(current->tgid)) {
transport_t *transport;
transport_t *next_transport;
list_for_each_entry_safe(transport, next_transport,
&transport_global.transport_list,
transport_list_node) {
sent |= transport_send_msg_nowait(transport, msg);
}
}
spin_unlock(&transport_global.transport_spinlock);
return sent;
}
static transport_t *transport_new(void)
{
transport_t *transport = mem_alloc0(sizeof(transport_t));
if (transport) {
INIT_LIST_HEAD(&transport->transport_list_node);
// remember client's process doing 'open' to auto-ignore it
transport->control_tgid = current->tgid;
spin_lock_init(&transport->msg_spinlock);
init_waitqueue_head(&transport->msg_wait_queue);
atomic64_set(&transport->events_mask, 0);
transport->shutdown = false;
transport->queue = NULL;
atomic_set(&transport->queue_event, 0);
if (transport_ring_init(&transport->msg_ring)
&& transport_set_init(&transport->sent_msgs_set)
&& 0 == task_info_status_set(current->tgid, TS_IGNORE)
&& transport_send_hello_nowait(transport)) {
transport_global_register(transport);
} else {
transport_free(transport);
transport = NULL;
}
}
DPRINTF("transport=%p", transport);
return transport;
}
int __init transport_mod_init(void)
{
int ret;
transport_global_init();
ret = device_mod_init();
if (ret) {
EPRINTF("'device_mod_init()' failure %i", ret);
}
return ret;
}
void transport_mod_down(void)
{
DPRINTF("");
device_mod_down();
DPRINTF("");
}
static msg_t *transport_lookup_msg_ref(transport_t *transport, msg_id_t reply_id) {
msg_t* msg = NULL;
DPRINTF("");
// TODO DK: Is it possible to use radix tree here instead?
spin_lock(&transport->msg_spinlock);
{
void *item_ptr = set_begin_ptr(&transport->sent_msgs_set);
void *end_ptr = set_end_ptr(&transport->sent_msgs_set);
while (item_ptr < end_ptr) {
msg_t *query = *(msg_t **) item_ptr;
if (MSG_ID(query) == reply_id) {
msg = query;
msg_ref(msg);
goto unlock;
}
item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr);
}
}
unlock:
spin_unlock(&transport->msg_spinlock);
DPRINTF("ret=%p", msg);
return msg;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static long transport_ioctl_pid_info(msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
long ret;
if (query_msg->img_size < (sizeof(msg_img_t) + sizeof(get_pid_info_img_t))) {
EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg)));
ret = -EINVAL;
} else {
msg_img_t *msg_img = MSG_IMG(query_msg);
get_pid_info_img_t *img = IMG_PAYLOAD(msg_img);
pid_t pid = img->pid;
ret = pid_info_return_msg_new(reply_msg, pid);
}
DPRINTF("ret=%li", ret);
return ret;
}
static long transport_ioctl_fs_root(msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
long ret;
if (query_msg->img_size < (sizeof(msg_img_t) + sizeof(get_fs_root_img_t))) {
EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg)));
ret = -EINVAL;
} else {
msg_img_t *msg_img = MSG_IMG(query_msg);
get_fs_root_img_t *img = IMG_PAYLOAD(msg_img);
pid_t pid = img->pid;
ret = fs_root_return_msg_new(reply_msg, pid);
}
DPRINTF("ret=%li", ret);
return ret;
}
static long install_file(struct file *file, int fd, int *pfd)
{
if (IS_ERR(file)) {
put_unused_fd(fd);
return PTR_ERR(file);
}
// file is consumed so no need to call 'fput'
fd_install(fd, file);
*pfd = fd;
return 0;
}
static long open_file_with_flags(const char *full_path, int uflags, int mode, int *pfd)
{
struct file *file;
int flags;
int fd;
DPRINTF("Opening file '%s', uflags '%d', mode '%d'", full_path, uflags, mode);
fd = get_unused_fd_compat();
if (fd < 0) {
return fd;
}
flags = uflags
#ifdef O_LARGEFILE
| O_LARGEFILE
#endif
#ifdef O_NOATIME
| O_NOATIME
#endif
// 'FMODE_NONOTIFY' refers to 'fanotify' not scanning the file.
#ifdef FMODE_NONOTIFY
| FMODE_NONOTIFY
#endif
;
file = filp_open(full_path, flags, mode);
return install_file(file, fd, pfd);
}
static inline long open_file(struct path *path, int *pfd)
{
struct file *file;
int flags;
int fd;
if (!path->dentry && !path->mnt) {
return -ENOENT;
}
fd = get_unused_fd_compat();
if (fd < 0) {
return fd;
}
flags = O_RDONLY
#ifdef O_LARGEFILE
| O_LARGEFILE
#endif
#ifdef O_NOATIME
| O_NOATIME
#endif
// 'FMODE_NONOTIFY' refers to 'fanotify' not scanning the file.
#ifdef FMODE_NONOTIFY
| FMODE_NONOTIFY
#endif
;
file = dentry_open_compat(path, flags);
if (IS_ERR(file)) {
// If open failed, let's try to open via the path.
// Notice that this open will be inside 'client' service context
// so this 'filp_open' has a good chance of failing as
// 'mount namespaces' might be different in the process.
// Perhaps a proper solution would be opening the file inside the original
// process context, but that would result in having to create 'file'
// early or to do extra context switches to the scanned process.
// Either way seems inefficient so it is currently avoided.
size_t size = PAGE_SIZE;
char *buf = mem_alloc(size);
const char *full_path;
if (!buf) {
return -ENOMEM;
}
full_path = d_path(path, buf, size);
if (!IS_ERR(full_path)) {
file = filp_open(full_path, flags, 0);
}
mem_free(buf);
}
return install_file(file, fd, pfd);
}
static long transport_ioctl_handle_open_file_from_msg(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
long ret;
msg_t* msg;
msg = transport_lookup_msg_ref(transport, MSG_ID(query_msg));
if (!msg) {
ret = -EINVAL;
} else {
int fd = -1;
struct path path;
thread_safe_path_load(&msg->path, &path);
msg_unref(msg);
ret = open_file(&path, &fd);
path_put(&path);
if (0 == ret) {
ret = open_file_return_msg_new(reply_msg, fd);
}
}
return ret;
}
static long transport_ioctl_handle_open_file_by_path(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
long ret;
char *path;
size_t pathSize;
int fd = -1;
msg_img_t *msg_img = MSG_IMG(query_msg);
open_file_by_path_img_t *img = IMG_PAYLOAD(msg_img);
if (MSG_SIZE(query_msg) <= sizeof(msg_img_t) + sizeof(open_file_by_path_img_t))
return -EINVAL;
path = img->path;
pathSize = MSG_SIZE(query_msg) - (sizeof(msg_img_t) + sizeof(open_file_by_path_img_t));
path[pathSize - 1] = '\0';
ret = open_file_with_flags(path, img->flags, img->mode, &fd);
if (0 == ret) {
ret = open_file_return_msg_new(reply_msg, fd);
}
return ret;
}
static long transport_ioctl_handle_get_version(msg_varsized_t *reply_msg)
{
return version_info_return_msg_new(reply_msg);
}
static long transport_ioctl_handle_data_queue_init(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
msg_img_t *msg_img = MSG_IMG(query_msg);
data_queue_params_t *params = IMG_PAYLOAD(msg_img);
long err;
shared_data_queue_t *queue;
uint32_t queue_size;
if (MSG_SIZE(query_msg) < sizeof(msg_img_t) + sizeof(data_queue_params_t))
return -EINVAL;
err = data_queue_create(params, &queue);
if (err) {
return err;
}
queue_size = params->size - DATA_QUEUE_HEADER_SIZE;
{
spin_lock(&transport->msg_spinlock);
if (transport->queue) {
spin_unlock(&transport->msg_spinlock);
virt_mem_free(queue, params->size);
return -EEXIST;
}
transport->queue = queue;
transport->queue_size = queue_size;
spin_unlock(&transport->msg_spinlock);
}
return data_queue_offsets_return_msg_new(reply_msg, queue_size);
}
static long transport_ioctl_write_read_msg(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
long ret;
action_type_t action_type;
if (MSG_REPLY(query_msg)) {
EPRINTF("'reply' ioctl is not supported");
ret = -EINVAL;
goto out;
}
action_type = MSG_TYPE(query_msg);
switch (action_type) {
case AT_GET_PID_INFO:
ret = transport_ioctl_pid_info(reply_msg, query_msg);
break;
case AT_GET_FS_ROOT:
ret = transport_ioctl_fs_root(reply_msg, query_msg);
break;
case AT_OPEN_FILE_FROM_MSG:
ret = transport_ioctl_handle_open_file_from_msg(transport, reply_msg, query_msg);
break;
case AT_OPEN_FILE_BY_PATH:
ret = transport_ioctl_handle_open_file_by_path(transport, reply_msg, query_msg);
break;
case AT_GET_VERSION:
ret = transport_ioctl_handle_get_version(reply_msg);
break;
case AT_INIT_SHARED_DATA_QUEUE:
ret = transport_ioctl_handle_data_queue_init(transport, reply_msg, query_msg);
break;
default:
EPRINTF("Unexpected '%s' message", action_type_to_string(action_type));
HEX_DUMP("query_msg: ", MSG_IMG(query_msg), MSG_SIZE(query_msg));
ret = -EINVAL;
break;
}
out:
DPRINTF("ret=%li", ret);
return ret;
}
static long transport_ioctl_copy_from_user(ioctl_hdr_t *ioctl_hdr,
msg_varsized_t *query_msg, void __user *user_data)
{
long ret;
size_t msg_size;
msg_sized_t *msg;
msg_img_t *msg_img;
void *payload;
if (copy_from_user(ioctl_hdr, user_data, sizeof(ioctl_hdr_t))) {
EPRINTF("'copy_from_user()' failure");
ret = -EFAULT;
goto out;
}
msg_size = ioctl_hdr->size;
if (msg_size < sizeof(msg_img_t)) {
EPRINTF("message image is too small");
ret = -EINVAL;
goto out;
}
if (msg_size > TRANSPORT_MSG_SIZE_MAX) {
EPRINTF("size > TRANSPORT_MSG_SIZE_MAX");
ret = -E2BIG;
goto out;
}
msg = msg_varsized_init(query_msg, msg_size);
if (!msg) {
ret = -ENOMEM;
goto out;
}
msg_img = MSG_IMG(msg);
payload = (uint8_t *)user_data + sizeof(ioctl_hdr_t);
if (copy_from_user(msg_img, payload, msg_size)) {
msg_varsized_uninit(query_msg);
EPRINTF("'copy_from_user()' failure");
ret = -EFAULT;
goto out;
}
ret = 0;
out:
DPRINTF("ret=%li", ret);
return ret;
}
static long transport_ioctl_copy_to_user(ioctl_hdr_t *ioctl_hdr,
msg_sized_t *reply_msg, void __user *user_data)
{
long ret;
size_t msg_size = MSG_SIZE(reply_msg);
size_t capacity;
void *payload;
msg_img_t *msg_img;
ioctl_hdr->size = msg_size;
if (copy_to_user(user_data, ioctl_hdr, sizeof(ioctl_hdr_t))) {
EPRINTF("'copy_to_user()' failure");
ret = -EFAULT;
goto out;
}
capacity = ioctl_hdr->capacity;
if (capacity < msg_size) {
WPRINTF("capacity=%zu < msg_size=%zu", capacity, msg_size);
ret = -ENOSPC;
goto out;
}
payload = (uint8_t *)user_data + sizeof(ioctl_hdr_t);
msg_img = MSG_IMG(reply_msg);
if (copy_to_user(payload, msg_img, msg_size)) {
EPRINTF("'copy_to_user()' failure");
ret = -EFAULT;
goto out;
}
ret = 0;
out:
DPRINTF("ret=%li", ret);
return ret;
}
long transport_device_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
transport_t *transport = filp->private_data;
long ret;
if (transport->shutdown) {
ret = -EIO;
goto out;
}
switch (cmd) {
case IOCTL_WRITE_AND_READ_MSG:
case IOCTL_READ_VERSION:
{
ioctl_hdr_t ioctl_hdr;
void *user_data = (void *)arg;
msg_varsized_t query_msg;
ret = transport_ioctl_copy_from_user(&ioctl_hdr, &query_msg, user_data);
if (!ret) {
msg_varsized_t reply_msg;
ret = transport_ioctl_write_read_msg(transport, &reply_msg, MSG_VARSIZED_GET_SIZED(&query_msg));
if (!ret) {
ret = transport_ioctl_copy_to_user(&ioctl_hdr, MSG_VARSIZED_GET_SIZED(&reply_msg), user_data);
msg_varsized_uninit(&reply_msg);
}
msg_varsized_uninit(&query_msg);
}
break;
}
default:
EPRINTF("Unexpected IOCTL cmd=%u", cmd);
ret = -ENOIOCTLCMD;
}
out:
if (-EINVAL == ret) {
transport_shutdown(transport);
}
DPRINTF("ret=%li", ret);
return ret;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ssize_t transport_device_read(struct file *filp, char __user *user_data,
size_t size, loff_t *offset)
{
msg_t *msg;
transport_t *transport = filp->private_data;
size_t img_size;
ssize_t ret;
if (filp->f_flags & O_NONBLOCK) {
EPRINTF("'non-blocking' mode is not supported yet");
ret = -EINVAL;
transport_shutdown(transport);
goto out;
}
if (!size) {
EPRINTF("'empty read' is not supported");
ret = -EINVAL;
transport_shutdown(transport);
goto out;
}
task_info_map_delete_exited();
retry_wait:
// We may start with 'wait*()' because it itself starts
// with 'condition' check.
if (wait_event_interruptible_exclusive(transport->msg_wait_queue,
transport->shutdown
|| !ring_is_empty(&transport->msg_ring))) {
ret = -EINTR;
goto out;
}
// Lock the state and check if processing is actually possible.
spin_lock(&transport->msg_spinlock);
{
if (transport->shutdown) {
ret = -EIO;
spin_unlock(&transport->msg_spinlock);
goto out;
}
if (ring_is_empty(&transport->msg_ring)) {
WPRINTF("wakeup without messages");
spin_unlock(&transport->msg_spinlock);
goto retry_wait;
}
msg = *(msg_t **) ring_consumer_ptr(&transport->msg_ring);
img_size = msg->img_size;
DPRINTF("size=%zu img_size=%zu", size, img_size);
if (size < img_size) {
ret = -ENOSPC;
spin_unlock(&transport->msg_spinlock);
goto out;
}
ring_consumer_index_move_one(&transport->msg_ring);
}
spin_unlock(&transport->msg_spinlock);
// 'copy_to_user' MAY sleep (for example in page fault handler)
if (copy_to_user(user_data, &msg->img, img_size)) {
WPRINTF("'copy_to_user()' failure");
ret = -EFAULT;
transport_shutdown(transport);
} else {
ret = img_size;
}
msg_unref(msg);
out:
DPRINTF("ret=%zi", ret);
return ret;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Forward declaration, to be available in the end of the code
static long wait_msg_killable_timeout(msg_t* msg, unsigned long timeout_jiffies);
static void msg_wait_reply(msg_t *msg)
{
long ret;
// Do not block 'control' process
if (atomic_read(&msg->reply_wait_count)
&& MSG_TYPE(msg) != MT_PONG
&& transport_is_control_tgid(current->tgid)) {
WPRINTF_RATELIMITED("avoiding blocking wait in control process");
return;
}
// We may start with 'wait*()' because it itself starts
// with 'condition' check.
DPRINTF("waiting for userspace reply...");
ret = wait_msg_killable_timeout(msg, msecs_to_jiffies(TRANSPORT_WAIT_REPLY_TIMEOUT_MSECS));
if (!ret) {
// Timeout here means unexpected issue with userspace.
FPRINTF("timeout waiting for userspace reply (msg_type=%i/%s)",
MSG_TYPE(msg),
msg_type_to_string(MSG_TYPE(msg)));
HEX_DUMP("msg: ", MSG_IMG(msg), MSG_SIZE(msg));
dump_stack();
// identify and shutdown transport failed to reply
transport_global_shutdown_msg(msg);
} else if (ret < 0) {
// Calling process has been interrupted as SIGKILL was received.
// In practice this means 'block'.
DPRINTF("message was interrupted...");
msg->interrupted = true;
} else {
// Userspace reply has been received (msg->reply_msg) or
// waiting has been explicitly aborted (msg->aborted) for
// example on userspace disconnect.
DPRINTF("wait finished (msg->block=%i)", msg->block);
}
}
static void msg_mark_async(msg_t *msg)
{
MSG_ID(msg) = 0;
MSG_REPLY(msg) = false;
}
void send_msg_async(msg_t *msg)
{
DPRINTF("msg=%p", msg);
msg_mark_async(msg);
send_msg_nowait(msg);
DPRINTF("");
}
void send_msg_async_unref(msg_t *msg)
{
if (msg) {
send_msg_async(msg);
msg_unref(msg);
}
}
static void msg_mark_sync(msg_t *msg)
{
MSG_ID(msg) = transport_global_sequence_next();
MSG_REPLY(msg) = false;
}
bool send_msg_sync_nowait(msg_t *msg)
{
bool sent;
DPRINTF("msg=%p", msg);
msg_mark_sync(msg);
sent = send_msg_nowait(msg);
DPRINTF("msg=%p sent=%i", msg, sent);
return sent;
}
void send_msg_sync(msg_t *msg)
{
DPRINTF("msg=%p", msg);
if (send_msg_sync_nowait(msg)) {
msg_wait_reply(msg);
}
DPRINTF("");
}
void send_msg_sync_unref(msg_t *msg)
{
if (msg) {
send_msg_sync(msg);
thread_safe_path_clear(&msg->path);
msg_unref(msg);
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static int transport_handle_ping_msg(transport_t *transport, msg_sized_t *ping)
{
int ret;
msg_t *pong;
bool sync;
if (ping->img_size < (sizeof(msg_img_t) + sizeof(ping_img_t))) {
DPRINTF("'ping' message is too short. ignoring it.");
ret = -EINVAL;
goto out;
}
pong = pong_msg_new(ping);
if (!pong) {
ret = -ENOMEM;
goto out;
}
// reflect ping's 'reply' policy
sync = !!MSG_ID(ping);
if (sync) {
msg_mark_sync(pong);
} else {
msg_mark_async(pong);
}
transport_send_msg_nowait(transport, pong);
if (sync) {
msg_wait_reply(pong);
}
msg_unref(pong);
ret = 0;
out:
return ret;
}
static int transport_handle_set_pid_status_msg(msg_sized_t *msg)
{
msg_img_t *msg_img;
pid_set_st_img_t *img;
pid_t pid;
task_status_t status;
int ret;
if (msg->img_size < (sizeof(msg_img_t) + sizeof(pid_set_st_img_t))) {
DPRINTF("'pid' message is too short. ignoring it.");
ret = -EINVAL;
goto out;
}
msg_img = MSG_IMG(msg);
img = IMG_PAYLOAD(msg_img);
pid = img->pid;
status = img->status;
ret = task_info_status_set(pid, status);
out:
DPRINTF("ret=%i", ret);
return ret;
}
static int transport_handle_del_pid_msg(msg_sized_t *msg)
{
msg_img_t *msg_img;
pid_del_img_t *img;
pid_t pid;
int ret;
if (msg->img_size < (sizeof(msg_img_t) + sizeof(pid_del_img_t))) {
DPRINTF("'pid' message is too short. ignoring it.");
ret = -EINVAL;
goto out;
}
msg_img = MSG_IMG(msg);
img = IMG_PAYLOAD(msg_img);
pid = img->pid;
ret = task_info_map_del(pid);
out:
DPRINTF("ret=%i", ret);
return ret;
}
static int transport_handle_enable_events(transport_t *transport, msg_sized_t *msg)
{
msg_img_t *msg_img;
events_mask_img_t *img;
uint64_t mask;
uint64_t old_mask;
int ret;
if (msg->img_size < (sizeof(msg_img_t) + sizeof(events_mask_img_t))) {
DPRINTF("'events' message is too short. ignoring it.");
ret = -EINVAL;
goto out;
}
msg_img = MSG_IMG(msg);
img = IMG_PAYLOAD(msg_img);
mask = img->events_mask;
spin_lock(&transport->msg_spinlock);
{
if (transport->shutdown) {
// Do not allow changing the mask when shutdown.
// Transport will not be able to receive any events.
ret = -EFAULT;
} else {
old_mask = atomic64_read(&transport->events_mask);
atomic64_set(&transport->events_mask, old_mask | mask);
ret = 0;
}
}
spin_unlock(&transport->msg_spinlock);
transport_global_recalculate_combined_event_mask();
out:
DPRINTF("ret=%i", ret);
return ret;
}
static int transport_handle_disable_events(transport_t *transport, msg_sized_t *msg)
{
msg_img_t *msg_img;
events_mask_img_t *img;
uint64_t mask;
uint64_t old_mask;
int ret;
if (msg->img_size < (sizeof(msg_img_t) + sizeof(events_mask_img_t))) {
DPRINTF("'events' message is too short. ignoring it.");
ret = -EINVAL;
goto out;
}
msg_img = MSG_IMG(msg);
img = IMG_PAYLOAD(msg_img);
mask = img->events_mask;
spin_lock(&transport->msg_spinlock);
{
if (transport->shutdown) {
// Do not allow changing the mask when shutdown.
// It is not particularly useful but keeping it.
ret = -EFAULT;
} else {
old_mask = atomic64_read(&transport->events_mask);
atomic64_set(&transport->events_mask, old_mask & (~mask));
ret = 0;
}
}
spin_unlock(&transport->msg_spinlock);
transport_global_recalculate_combined_event_mask();
out:
DPRINTF("ret=%i", ret);
return ret;
}
// FIXME: do something with 'reply'. For example merge several replies
// into one; link replies into list; extract 'responces' and merge them.
static void handle_reply(msg_t *query_msg, msg_sized_t *reply_msg)
{
// handle 'long' 'reply'
size_t headers_size = sizeof(msg_img_t) + sizeof(reply_img_t);
// Note: for compatibility with legacy short 'reply_img_t' default 'reply_type' is RT_ALLOW
if (MSG_SIZE(reply_msg) >= headers_size) {
msg_img_t *reply_msg_img = MSG_IMG(reply_msg);
reply_img_t *reply_img = IMG_PAYLOAD(reply_msg_img);
reply_type_t reply_type = reply_img->type;
DPRINTF("MSG_SIZE(reply_msg)=%zu - headers_size=%zu = %zu reply_type=%u",
MSG_SIZE(reply_msg), headers_size,
MSG_SIZE(reply_msg) - headers_size, reply_type);
if (RT_BLOCK == reply_type) {
query_msg->block = true;
}
}
}
static int transport_handle_reply(transport_t *transport, msg_sized_t *reply)
{
int ret;
msg_id_t reply_id = MSG_ID(reply);
msg_type_t reply_type = MSG_TYPE(reply);
DPRINTF("");
// find 'query' matching this 'reply'
spin_lock(&transport->msg_spinlock);
{
void *item_ptr = set_begin_ptr(&transport->sent_msgs_set);
void *end_ptr = set_end_ptr(&transport->sent_msgs_set);
while (item_ptr < end_ptr) {
msg_t *query = *(msg_t **) item_ptr;
if (MSG_ID(query) == reply_id) {
// remove 'query' from 'set'
*(msg_t **) item_ptr = *(msg_t **) set_item_ptr(
&transport->sent_msgs_set,
set_items_count_dec(&transport->sent_msgs_set));
handle_reply(query, reply);
msg_reply_wait_count_dec(query);
ret = 0;
goto unlock;
}
item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr);
}
WPRINTF("Unexpected 'reply' with type=%i id=%llX", reply_type, reply_id);
ret = -EINVAL;
}
unlock:
spin_unlock(&transport->msg_spinlock);
DPRINTF("ret=%i", ret);
return ret;
}
static int transport_handle_msg(transport_t *transport, msg_sized_t *msg)
{
int ret;
if (msg->img_size < sizeof(msg_img_t)) {
DPRINTF("message image is too small");
ret = -EINVAL;
goto out;
}
if (MSG_REPLY(msg)) {
ret = transport_handle_reply(transport, msg);
} else { // !reply
action_type_t type = MSG_TYPE(msg);
switch (type) {
case AT_PING:
ret = transport_handle_ping_msg(transport, msg);
break;
case AT_PID_SET_ST:
ret = transport_handle_set_pid_status_msg(msg);
break;
case AT_PID_DEL:
ret = transport_handle_del_pid_msg(msg);
break;
case AT_ENABLE_EVENTS:
ret = transport_handle_enable_events(transport, msg);
break;
case AT_DISABLE_EVENTS:
ret = transport_handle_disable_events(transport, msg);
break;
case AT_WAIT_SHARED_DATA_QUEUE:
ret = transport_data_queue_wait(transport);
break;
default:
WPRINTF("Unexpected message type=%i/%s", type, action_type_to_string(type));
ret = -EINVAL;
}
}
out:
DPRINTF("ret=%i", ret);
return ret;
}
ssize_t transport_device_write(struct file *filp, const char __user *user_data,
size_t size, loff_t *offset)
{
transport_t *transport = filp->private_data;
msg_varsized_t msg;
msg_sized_t* smsg;
msg_img_t *msg_img;
ssize_t ret;
if (transport->shutdown) {
ret = -EIO;
goto out;
}
if (filp->f_flags & O_NONBLOCK) {
EPRINTF("'non-blocking' mode is not supported yet");
ret = -EINVAL;
transport_shutdown(transport);
goto out;
}
if (!size) {
WPRINTF("'zero write' is not supported");
ret = -EINVAL;
transport_shutdown(transport);
goto out;
}
if (size > TRANSPORT_MSG_SIZE_MAX) {
WPRINTF("size > TRANSPORT_MSG_SIZE_MAX");
ret = -E2BIG;
goto out;
}
smsg = msg_varsized_init(&msg, size);
if (!smsg) {
ret = -ENOMEM;
goto out;
}
msg_img = MSG_IMG(smsg);
if (copy_from_user(msg_img, user_data, size)) {
EPRINTF("'copy_from_user()' failure");
ret = -EFAULT;
transport_shutdown(transport);
goto free_msg;
}
ret = transport_handle_msg(transport, smsg);
if (ret) {
// make sure error code is negative
if (ret > 0) {
EPRINTF("error code must be negative");
ret = -ret;
}
goto free_msg;
}
ret = size;
free_msg:
msg_varsized_uninit(&msg);
out:
DPRINTF("ret=%zi", ret);
return ret;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
/*
Warning: 'transport_open()' and 'transport_release()' may be
simultaneously invoked by several threads or processes.
Note: We can match different 'transport' instances using 'device'
'major'/'minor' from 'inode->i_rdev'. Pointer to selected 'trasport'
can be stored in 'filp->private_data' for later use in '*_read()',
'*_write()', etc.
Note: We may create 'transport' on 'first open' and destroy it on
'last close'.
*/
/*
There is possibility of 'deadlock' between our 'kernel' and
'userspace' code while processing events generated by our userspace
process until registration of our userspace process in 'ignore' list.
*/
int transport_device_open(struct inode *inode, struct file *filp)
{
transport_t *transport;
int ret;
DPRINTF("inode->i_rdev: major=%u minor=%u", imajor(inode), iminor(inode));
DPRINTF("filp->f_flags=%X", filp->f_flags);
if (filp->f_flags & O_NONBLOCK) {
EPRINTF("'non-blocking' mode is not supported yet");
ret = -EINVAL;
goto out;
}
mutex_lock(&transport_global.transport_mutex);
{
DPRINTF("transport_count=%u", transport_global.transport_count);
transport = transport_new();
if (!transport) {
WPRINTF("'%s()' failure", "transport_new");
ret = -ENOMEM;
goto unlock_open_close_mutex;
}
filp->private_data = transport;
if (!transport_global.transport_count) {
// FIXME: 'attach' may fail
IPRINTF("attaching interceptors");
ret = syscall_hooks_attach();
if (ret) {
EPRINTF("'%s()' failure %i", "syscall_hooks_attach", ret);
goto unlock_open_close_mutex;
}
ret = tracepoints_attach();
if (ret) {
EPRINTF("'%s()' failure %i", "tracepoints_attach", ret);
syscall_hooks_detach();
goto unlock_open_close_mutex;
}
IPRINTF("interceptors attached");
}
++transport_global.transport_count;
ret = 0;
}
unlock_open_close_mutex:
mutex_unlock(&transport_global.transport_mutex);
out:
DPRINTF("ret=%i", ret);
return ret;
}
// 'release()' means 'close()'
int transport_device_release(struct inode *inode, struct file *filp)
{
mutex_lock(&transport_global.transport_mutex);
{
transport_t *transport = filp->private_data;
transport_shutdown(transport);
transport_free(transport);
DPRINTF("transport_count=%u", transport_global.transport_count);
if (!--transport_global.transport_count) {
IPRINTF("detaching interceptors");
tracepoints_detach();
// FIXME: 'syscall_hooks_detach()' may fail
syscall_hooks_detach();
task_info_map_clear();
IPRINTF("interceptors detached");
}
}
mutex_unlock(&transport_global.transport_mutex);
return 0;
}
int transport_device_mmap(struct file *filp, struct vm_area_struct *vma)
{
int ret;
transport_t *transport = filp->private_data;
if (transport->shutdown) {
ret = -EIO;
goto out;
}
ret = transport_data_queue_mmap(transport, vma);
out:
return ret;
}
static long wait_msg_killable_timeout(msg_t* msg, unsigned long timeout_jiffies)
{
#ifndef HAVE_WAIT_EVENT_KILLABLE_TIMEOUT
// 'wait_event_interruptible_timeout' has to be a define and so is
// 'TASK_KILLABLE' and 'TASK_INTERRUPTIBLE'.
// I need functionality of 'wait_event_interruptible_timeout'
// but 'TASK_INTERRUPTIBLE' replaced with 'TASK_KILLABLE' which
// is achieved using the 'define' tricks by redefining 'TASK_INTERRUPTIBLE'.
// If the trick won't work, using the regular 'wait_event_timeout'.
#if defined(TASK_KILLABLE) && defined(TASK_INTERRUPTIBLE) && defined(wait_event_interruptible_timeout) && !defined(signal_pending)
#undef TASK_INTERRUPTIBLE
#define TASK_INTERRUPTIBLE TASK_KILLABLE
#define signal_pending fatal_signal_pending
return wait_event_interruptible_timeout(msg->wait_queue,
!atomic_read(&msg->reply_wait_count),
timeout_jiffies);
#undef TASK_INTERRUPTIBLE
#undef signal_pending
#else
// Something weird is going on, rollback to 'TASK_UNINTERRUPTIBLE' variant.
// It should not cause any issues though as far as APL
// daemon is responding to events so it is not bad.
return wait_event_timeout(msg->wait_queue,
!atomic_read(&msg->reply_wait_count),
timeout_jiffies);
#endif
#else
// Just use the well defined macros available.
return wait_event_killable_timeout(msg->wait_queue,
!atomic_read(&msg->reply_wait_count),
timeout_jiffies);
#endif
}