Commit 891ed80a authored by Rainer Jung's avatar Rainer Jung Committed by ǝɹʇʇɐʃǝ◖ xıʃǝɟ
Browse files

Replace select() by poll().

select() has a well-known limit and the maximum file descriptor
that can be used (FD_SETSIZE). On Linux it is in most cases 1024.

Our renderd uses a lot of planet tiff files, so it had already
more than 1600 FDs for tiff files open. That means any new FD gets a
number bigger than that and using such FDs in select() leads to crashes
(because select() uses a fixed size bitmap).

The same can happen in mod_tile if the surounding web server is very
busy, altough it is less likely to happen there.

Changes in src/mod_tile.c:
- include poll.h
- poll timeout is an int containing miliseconds
- use "s > 0" instead of "s == 1" as success
  (although it should never be >1)
- different log message for timeout and error case

Changes in src/daemon.c:
- remove include for sys/select.h
- replace connections array by new array pfd used in poll()
- include special FDs (exit and listen) at index 0 and 1 in this array
- no longer reorganize array while iteratig through it.
  That would have led to items being skipped.
  Instead mark array slots with fixed connections with
  a negative FD. These are automatically skipped by poll.
  Reuse these slots later when new connections come in.
- rename num_connections to num_cslots (initial segment
  of the array that poll() should check) and
  num_conns (number of slots in this segment, that are
  actually used by connections; this number is only
  used in logging).
- slightly enhance debug log lines
parent acb11808
......@@ -24,7 +24,6 @@
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/select.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <poll.h>
......@@ -49,6 +48,10 @@
#define PIDFILE "/run/renderd/renderd.pid"
#define PFD_LISTEN 0
#define PFD_EXIT_PIPE 1
#define PFD_SPECIAL_COUNT 2
#ifndef MAIN_ALREADY_DEFINED
static pthread_t *render_threads;
static pthread_t *slave_threads;
......@@ -187,12 +190,13 @@ void request_exit(void)
void process_loop(int listen_fd)
{
int num_connections = 0;
int connections[MAX_CONNECTIONS];
int num_cslots = 0;
int num_conns = 0;
int pipefds[2];
int exit_pipe_read;
struct pollfd pfd[MAX_CONNECTIONS + 2];
bzero(connections, sizeof(connections));
bzero(pfd, sizeof(pfd));
// A pipe is used to allow the render threads to request an exit by the main process
if (pipe(pipefds)) {
......@@ -203,56 +207,70 @@ void process_loop(int listen_fd)
exit_pipe_fd = pipefds[1];
exit_pipe_read = pipefds[0];
pfd[PFD_LISTEN].fd = listen_fd;
pfd[PFD_LISTEN].events = POLLIN;
pfd[PFD_EXIT_PIPE].fd = exit_pipe_read;
pfd[PFD_EXIT_PIPE].events = POLLIN;
while (1) {
struct sockaddr_un in_addr;
socklen_t in_addrlen = sizeof(in_addr);
fd_set rd;
int incoming, num, nfds, i;
FD_ZERO(&rd);
FD_SET(listen_fd, &rd);
nfds = listen_fd + 1;
for (i = 0; i < num_connections; i++) {
FD_SET(connections[i], &rd);
nfds = MAX(nfds, connections[i] + 1);
}
int incoming, num, i;
FD_SET(exit_pipe_read, &rd);
nfds = MAX(nfds, exit_pipe_read + 1);
num = select(nfds, &rd, NULL, NULL, NULL);
// timeout -1 means infinite timeout,
// a value of 0 would return immediately
num = poll(pfd, num_cslots + PFD_SPECIAL_COUNT, -1);
if (num == -1) {
perror("select()");
perror("poll()");
} else if (num) {
if (FD_ISSET(exit_pipe_read, &rd)) {
if (pfd[PFD_EXIT_PIPE].revents & POLLIN) {
// A render thread wants us to exit
break;
}
//printf("Data is available now on %d fds\n", num);
if (FD_ISSET(listen_fd, &rd)) {
num--;
if (pfd[PFD_LISTEN].revents & POLLIN) {
incoming = accept(listen_fd, (struct sockaddr *) &in_addr, &in_addrlen);
if (incoming < 0) {
perror("accept()");
} else {
if (num_connections == MAX_CONNECTIONS) {
syslog(LOG_WARNING, "Connection limit(%d) reached. Dropping connection\n", MAX_CONNECTIONS);
close(incoming);
} else {
connections[num_connections++] = incoming;
syslog(LOG_DEBUG, "DEBUG: Got incoming connection, fd %d, number %d\n", incoming, num_connections);
int add = 0;
// Search for unused slot
for (i = 0; i < num_cslots; i++) {
if (pfd[i + PFD_SPECIAL_COUNT].fd < 0) {
add = 1;
break;
}
}
// No unused slot found, add at end if space available
if (!add) {
if (num_cslots == MAX_CONNECTIONS) {
syslog(LOG_WARNING, "Connection limit(%d) reached. Dropping connection\n", MAX_CONNECTIONS);
close(incoming);
} else {
i = num_cslots;
add = 1;
num_cslots++;
}
}
if (add) {
pfd[i + PFD_SPECIAL_COUNT].fd = incoming;
pfd[i + PFD_SPECIAL_COUNT].events = POLLIN;
num_conns ++;
syslog(LOG_DEBUG, "DEBUG: Got incoming connection, fd %d, number %d, total conns %d, total slots %d\n", incoming, i, num_conns, num_cslots);
}
}
}
for (i = 0; num && (i < num_connections); i++) {
int fd = connections[i];
for (i = 0; num && (i < num_cslots); i++) {
int fd = pfd[i + PFD_SPECIAL_COUNT].fd;
if (FD_ISSET(fd, &rd)) {
if (fd >= 0 && pfd[i + PFD_SPECIAL_COUNT].revents & POLLIN) {
struct protocol cmd;
int ret = 0;
memset(&cmd, 0, sizeof(cmd));
......@@ -261,17 +279,11 @@ void process_loop(int listen_fd)
ret = recv_cmd(&cmd, fd, 0);
if (ret < 1) {
int j;
num_connections--;
syslog(LOG_DEBUG, "DEBUG: Connection %d, fd %d closed, now %d left\n", i, fd, num_connections);
for (j = i; j < num_connections; j++) {
connections[j] = connections[j + 1];
}
num_conns--;
syslog(LOG_DEBUG, "DEBUG: Connection %d, fd %d closed, now %d left, total slots %d\n", i, fd, num_conns, num_cslots);
request_queue_clear_requests_by_fd(render_request_queue, fd);
close(fd);
pfd[i + PFD_SPECIAL_COUNT].fd = -1;
} else {
enum protoCmd rsp = rx_request(&cmd, fd);
......@@ -284,7 +296,7 @@ void process_loop(int listen_fd)
}
}
} else {
syslog(LOG_ERR, "Select timeout");
syslog(LOG_ERR, "poll timeout");
}
}
}
......
......@@ -62,6 +62,7 @@ module AP_MODULE_DECLARE_DATA tile_module;
#include <arpa/inet.h>
#include <netdb.h>
#include <inttypes.h>
#include <poll.h>
#include "gen_tile.h"
......@@ -303,16 +304,16 @@ static int request_tile(request_rec *r, struct protocol *cmd, int renderImmediat
} while (retry--);
if (renderImmediately) {
struct timeval tv = {(renderImmediately > 2 ? scfg->request_timeout_priority : scfg->request_timeout), 0 };
fd_set rx;
int timeout = (renderImmediately > 2 ? scfg->request_timeout_priority : scfg->request_timeout);
struct pollfd rx;
int s;
while (1) {
FD_ZERO(&rx);
FD_SET(fd, &rx);
s = select(fd + 1, &rx, NULL, NULL, &tv);
rx.fd = fd;
rx.events = POLLIN;
s = poll(&rx, 1, timeout * 1000);
if (s == 1) {
if (s > 0) {
bzero(&resp, sizeof(struct protocol));
ret = recv(fd, &resp, sizeof(struct protocol_v2), 0);
......@@ -339,11 +340,17 @@ static int request_tile(request_rec *r, struct protocol *cmd, int renderImmediat
"Response does not match request: xml(%s,%s) z(%d,%d) x(%d,%d) y(%d,%d)", cmd->xmlname,
resp.xmlname, cmd->z, resp.z, cmd->x, resp.x, cmd->y, resp.y);
}
} else {
} else if (s == 0) {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
"request_tile: Request xml(%s) z(%d) x(%d) y(%d) could not be rendered in %i seconds",
cmd->xmlname, cmd->z, cmd->x, cmd->y,
(renderImmediately > 1 ? scfg->request_timeout_priority : scfg->request_timeout));
timeout);
break;
} else {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
"request_tile: Request xml(%s) z(%d) x(%d) y(%d) timeout %i seconds failed with reason: %s",
cmd->xmlname, cmd->z, cmd->x, cmd->y,
timeout, strerror(errno));
break;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment