[OpenSIPS-Users] Problem with opensips core dumping on high load

Andreas Bachmann andreas at jibemobile.com
Thu Mar 12 16:09:45 CET 2015


Hello,

I recognized opensips coredumping in the latest version when being under high load. The problem is an invalid pointer when iterating over the user location in memory table (DB mode is 0). Below the backtrace of where this happen. Note: we added an extra NULL check in the line to make sure that _r is not null. But as u can see the one is still crashing so we assume that the location table got corrupted – pointer not null but pointing to nirwana.
We are load testing only register cycles (register/401/register/200). After the trace the current config … The important part is the “route[register_request]”. Tests have shown that if we strip off the cachedb calls or using cache local instead of cache_mysql – this not happening. Could it be a conflict in the timer procedure used by both modules? The cache needs to be distributed – that’s why cache_mysql was chosen.

Any thoughts?

Core was generated by `/opt/opensips/sbin/opensips -P /var/run/opensips.pid -w /opt/opensips -m 8192 -'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  nodb_timer (_r=0x7f8f00000000) at urecord.c:223
223             if (!_r->contacts) return 0;
(gdb) #0  nodb_timer (_r=0x7f8f00000000) at urecord.c:223
        ptr = <optimized out>
        t = <optimized out>
#1  timer_urecord (_r=_r at entry=0x7f8f00000000, ins_list=ins_list at entry=0x7f8fed2ec480) at urecord.c:367
No locals.
#2  0x00007f91eed428ac in mem_timer_udomain (_d=0x7f8fed2ec478) at udomain.c:791
        ptr = 0x7f8f00000000
        dest = <optimized out>
        i = 61
        ret = <optimized out>
        flush = 0
        it = {node = 0x7f8ff14136c8, map = 0x7f8fed2f0238}
        __FUNCTION__ = "mem_timer_udomain"
#3  0x00007f91eed356fe in synchronize_all_udomains () at dlist.c:694
        res = 0
        ptr = 0x7f8fed2ec418
#4  0x00007f91eed47ca7 in timer (ticks=<optimized out>, param=<optimized out>) at ul_mod.c:439
        __FUNCTION__ = "timer"
#5  0x00000000004df9c8 in timer_ticker (drift=<synthetic pointer>, timer_list=<optimized out>) at timer.c:384
        t = 0x7f91f1186da0
        j = 300
        ij = 300050000
        ij_marker = 300050000
#6  run_timer_process (tpl=0x7f91f11862e8, tpl=0x7f91f11862e8) at timer.c:506
        multiple = <optimized out>
        cnt = <optimized out>
        o_tv = <optimized out>
        tv = {tv_sec = 0, tv_usec = 0}
        drift = 0
        uinterval = 100000
        wait = <optimized out>
#7  start_timer_processes () at timer.c:616
        tpl = 0x7f91f11862e8
        pid = <optimized out>
        __FUNCTION__ = "start_timer_processes"
#8  0x00000000004170f9 in main_loop () at main.c:1016
        i = <optimized out>
        pid = <optimized out>
        si = 0x0
        startup_done = 0x0
        chd_rank = 64
        rc = <optimized out>
        load_p = 0x7f8fed2f67c8
#9  main (argc=<optimized out>, argv=<optimized out>) at main.c:1634
        cfg_log_stderr = <optimized out>
        cfg_stream = <optimized out>
        c = <optimized out>
        r = <optimized out>
        tmp = 0x7fff9dd62f17 ""
        tmp_len = <optimized out>
        port = <optimized out>
        proto = <optimized out>
        options = 0x5c59a8 "f:cCm:M:b:l:n:N:rRvdDFETSVhw:t:u:g:P:G:W:o:"
        ret = -1
        seed = 3178038181
        __FUNCTION__ = "main"




Configuration:

####### Global Parameters #########

tcp_children=256
tcp_max_connections=500000
tcp_keepalive=1
tcp_keepcount=3
tcp_keepidle=300
tcp_keepinterval=300
tcp_connection_lifetime=1200
tcp_max_msg_chunks=20
tcp_max_msg_time=15
tcp_connect_timeout=1
tcp_send_timeout=1
#tcp_async=1

disable_tls=yes
tls_verify_server = 1
# There won't be a client presenting a real valid cert. If a client would
# presente a cert - even if not required (see below) this option would make
# the client connecting fail (see opensips docu).
tls_verify_client = 0
tls_require_client_certificate = 0
tls_method = TLSv1

disable_core_dump=no

debug=0
server_header="Server: Jibe Mobile SIP Proxy 1.10.1"
log_stderror=no
log_facility=LOG_LOCAL0

fork=yes
children=64

auto_aliases=no

listen=...

####### Modules Section ########

#set module path
mpath="/opt/opensips/lib64/opensips/modules/"

# helper and util modules
loadmodule "maxfwd.so"
loadmodule "db_mysql.so"
loadmodule "textops.so"
loadmodule "sipmsgops.so"

# ----- Management interface -----
loadmodule "mi_fifo.so"
modparam("mi_fifo", "fifo_name", "/tmp/opensips_fifo")
modparam("mi_fifo", "fifo_mode", 0666)

# core modules for sip routing and checking

# ----- Stateless signalling ----
loadmodule "sl.so"

# ----- Transaction Management -----
loadmodule "tm.so"

# NOTE: signaling.so requires sl/tm loaded before itself
loadmodule "signaling.so"

# set final reply timer for SIP messages in seconds
modparam("tm", "fr_timer", 32)
modparam("tm", "fr_inv_timer", 150)

# ----- Record Routing -----
loadmodule "rr.so"
modparam("rr", "append_fromtag", 1)
modparam("rr", "enable_double_rr", 1)

# Dialog module to track RTP proxy in use for the RTP session
loadmodule "dialog.so"

# ----- user location -----
loadmodule "usrloc.so"

modparam("usrloc", "db_mode", 0) # In memory only
modparam("usrloc", "timer_interval", 300)
modparam("usrloc", "nat_bflag", "nat_branch")

# ----- Registrar ------
loadmodule "registrar.so"
modparam("registrar", "max_contacts", 1)
modparam("registrar", "default_expires", 3600)
modparam("registrar", "min_expires", 20)
modparam("registrar", "max_expires", 3600)
modparam("registrar", "tcp_persistent_flag", "tcp_persistent")
modparam("registrar", "mcontact_avp", "$avp(contact_info)")
modparam("registrar", "received_avp", "$avp(42)")

#GRUU support for SERV-2317: 1=disable is default, 0=enable
modparam("registrar", "disable_gruu", 1)
# ----- Authentication -----
loadmodule "auth.so"
loadmodule "auth_db.so"
modparam("auth_db", "calculate_ha1", yes)
modparam("auth_db", "password_column", "password")
modparam("auth_db", "db_url", "mysql://....")
modparam("auth_db", "load_credentials", "")

# ----- Database operations for AVP -----
loadmodule "avpops.so"
modparam("avpops", "db_url", "1 mysql://...")
modparam("avpops", "db_url", "2 mysql://...")
modparam("avpops", "db_url", "3 mysql://...")

# ----------------- setting module-specific parameters ---------------
# ----- mi_fifo params -----

# ----- CacheDB binding for cache interface
loadmodule "cachedb_sql.so"
modparam("cachedb_sql", "db_url","mysql://opensips:opensipsrw@69.194.8.32/opensips")
modparam("cachedb_sql", "cache_clean_period",3600)

# ----- URI -----
loadmodule "uri.so"
modparam("uri", "use_uri_table", 0)
modparam("uri", "db_url", "mysql://opensips:opensipsrw@69.194.8.32/opensips")

# ----- Accounting -----
loadmodule "acc.so"
modparam("acc", "db_url", "mysql://opensips:opensipsrw@69.194.8.32/opensips")
modparam("acc", "db_flag", "DB_FLAG")
modparam("acc", "cdr_flag", "CDR_FLAG")
modparam("acc", "early_media", 1)
modparam("acc", "report_cancels", 1)
modparam("acc", "db_missed_flag", 2)
modparam("acc", "failed_transaction_flag", 3)
modparam("acc", "detect_direction", 0)

# ----- NAT Helper -----
loadmodule "nathelper.so"
modparam("nathelper", "natping_interval", 0)
modparam("nathelper", "ping_nated_only", 1) # Ping only clients behind NAT, set to 1
modparam("nathelper", "sipping_bflag", 7) # Changed from 8 to 7
modparam("nathelper", "sipping_from", "sip:pinger at rcs.jibemobile.com")
modparam("nathelper", "received_avp", "$avp(42)")

# ----- STUN Binding based keep-alive - RFC 6223
loadmodule "stun.so"
modparam("stun", "primary_ip", "...")
modparam("stun", "primary_port", "5671")
modparam("stun", "alternate_ip", "127.0.0.1")
modparam("stun", "alternate_port", "6600")

# ----- RTPProxy -----
loadmodule "rtpproxy.so"
# Retry lost rtpproxy mgmt connections after 20 seconds
modparam("rtpproxy", "rtpproxy_disable_tout", 20)
# Wait 2 seconds after sending an rtp mgmt packet for a response (US is a long way away)
modparam("rtpproxy", "rtpproxy_tout", 2)
# Retry 10 times after a timeout
modparam("rtpproxy", "rtpproxy_retr", 10)


modparam("rtpproxy", "rtpproxy_sock", "0 == udp:69.194.11.206:7890 udp:69.194.11.207:7890 udp:69.194.11.208:7890 udp:69.194.11.209:7890")


####### Routing Logic ########

##############################
# main request routing logic #
##############################
route {
$var(hub) = NULL;
$var(local_location) = "sip:.,;transport=tcp";
$var(imas) = "sip:...;transport=udp";
$var(rls) = "sip:...;transport=udp";
$var(mps) = "sip:...;transport=udp";
$var(otherCloud) = NULL;
$var(conference_factory) = "sip:conference at jibemobile.com";
$var(network_wifi) = "..."; # wifi
$var(network_mobile) = "..."; # mobile

#COM-97
$var(blackBird) = "true";
xlog("JIBE CN ($ci): Incoming request: $rm: $ruri from: $si:$sp on $pr (source=$hdr(P-Request-Source))\r\n");
route(sanity_check);
route(receive_lb_message);
# Not sure if this is the right place to trigger accounting
# aba: !!! NO IT'S NOT !!!
if (is_method("INVITE")) {
route(db_cdr_accounting);
}

if ( has_totag() ) {
route(in_dialog_request);
} else if ( is_method("REGISTER") ) {
route(register_request);
} else if ( is_method("CANCEL") ) {
route(cancel_request);
} else if ( $hdr(P-Request-Source) == "hub" ) {
# check if we are on the right proxy - if not, send request to the right one
route(check_hub_request);
   # hit target proxy - route to client
route(terminating_hub_request);
} else if ($hdr(P-Request-Source) == "server") {
route(check_server_request);
if(is_method("SUBSCRIBE") && ($hdr(Event) =~ "presence" || $hdr(Event) =~ "xcap") ) {
route(presence_server);
exit;
}
# terminating hub routing - do media proxying and traffic shaping
route(originating_server_request);
} else if ( $hdr(P-Request-Source) == "proxy") {
# terminating proxy routing - do media proxying and traffic shaping
route(terminating_proxy_request);
} else {
# Client requests
# set avp to mark the transaction as client originated
# avps are bound to the current transaction/message and will be available
# in onreply for this transaction/message
$avp(client_originating_request) = "yes";
if ( is_method("OPTIONS") ) {
route(options_request);
} else if (is_method("MESSAGE") && $cT =~ "system-request") {
#originating system request SCA-5684
route(system_message_request);
} else {
# originating client request - traffic shaping, lookup of target node and media proxying
route(originating_client_request);
}
}
}

##########################################################################################
# SANITY CHECK ON INCOMING MESSAGES #
##########################################################################################
route[sanity_check] {
xlog("JIBE CN ($ci): Sanity check on incoming request\r\n");
if ( !mf_process_maxfwd_header("10") ) {
sl_send_reply("483","Too Many Hops");
exit;
}
}

#########################################################################################
# REGISTER request #
##########################################################################################
route[register_request] {
# authenticate the REGISTER requests
if (!www_authorize("rcs.jibemobile.com", "subscriber")) {
xlog("JIBE CN ($ci): Not authenticated - challenging user\r\n");
www_challenge("rcs.jibemobile.com", "0");
exit;
}

# USER authenticated - continue with setting keep alive values
#For keep-alive negotiation RFC-6223
$avp(1) = $(hdr(Via)[0]) ;
#If Keep header exists in via[0], initiate negotiation
if ($avp(1) =~ "keep") {
#Different keep alive value if network access info. is provided
if ($hdr(P-Access-Network-Info)) {
#Mobile Network
if ($hdr(P-Access-Network-Info) =~ "3GPP-GERAN" ||
$hdr(P-Access-Network-Info) =~ "3GPP-UTRAN-TDD" ||
$hdr(P-Access-Network-Info) =~ "3GPP-E-UTRAN-TDD" ||
$hdr(P-Access-Network-Info) =~ "3GPP2-1X" ||
$hdr(P-Access-Network-Info) =~ "3GPP2-1X-HRPD") {
avp_subst("$avp(1)/$avp(2)","/keep/keep=240/ig");
} else if ($hdr(P-Access-Network-Info) =~ "IEEE-802.16e") {
#Wi-Max Network
avp_subst("$avp(1)/$avp(2)","/keep/keep=10/ig");
} else if ($hdr(P-Access-Network-Info) =~ "IEEE-802.11" || $hdr(P-Access-Network-Info) =~ "ethernet") {
#Ethernet Network
avp_subst("$avp(1)/$avp(2)","/keep/keep=240/ig");
} else {
#Unknown
avp_subst("$avp(1)/$avp(2)","/keep/keep=0/ig");
}
} else if($var(network_mobile) != $var(network_wifi)) {
#No Access-Network-Info header, look at interface of which the request came in from
if(dst_ip == ...) {
avp_subst("$avp(1)/$avp(2)","/keep/keep=240/ig");
} else if(dst_ip == ...) {
avp_subst("$avp(1)/$avp(2)","/keep/keep=240/ig");
}
} else {
#Let client make the call since network is not identified
avp_subst("$avp(1)/$avp(2)","/keep/keep=240/ig");
}
#We can't trap REGISTER request's response object, so adding a new header instead
        append_to_reply("J-Via: $avp(2)\r\n");
}

#SERV-1071: Add P-Associated-URI to successful REGISTER responses for Samsung
append_to_reply("P-Associated-URI: <tel:$fU>, <sip:$fU at rcs.jibemobile.com>\r\n");

# IanB - ensure client TCP connections stay open
if (proto==TCP) {
# Keep TCP/TLS connections open until the registration
# expires, by setting the tcp_persistent_flag
setflag(tcp_persistent);
}

# TODO - check if this really required
force_rport();

if ($hdr(User-Agent) !~ "SRG") {
xlog("L_INFO", "JIBE CN ($ci): NAT Fixing Register.\n");
fix_nated_register();
}

#######################################################################
# 200 OK or Error responses gets sent with save() action
# Note: Any append_to_reply call will be too late after this point!
#######################################################################
# IanB - 'f' to force save as part of single registration configuration
if ( !save("location", "f") ) {
sl_reply_error();
exit;
}

if (registered("location", "$fu")) {
# store location for the user in the share location cache
xlog("JIBE CN ($ci): storing proxy: $var(local_location) for $tU\r\n");
        xlog("JIBE CN ($ci): contact expire=$ct.fields(expires) and hdr(Expires) = $hdr(Expires)\r\n");
#
# If expire header/contact param exists then specify cache timeout
#
$avp(expireValue) = $ct.fields(expires);
if ($avp(expireValue) == NULL) {
   if( is_present_hf("Expires") && $(hdr(Expires){s.int}) != 0) {
       $avp(expireValue) = $(hdr(Expires){s.int});
   }
}

if ($avp(expireValue) != NULL) {
   #cache_store("sql","proxy_$tU","$var(local_location)", $(avp(expireValue){s.int}));
} else {
   #cache_store("sql","proxy_$tU","$var(local_location)");
}

} else {
# remove store location for the user in the share location cache
xlog("JIBE CN ($ci): removing proxy: $var(proxy_location) for $tU\r\n");
#cache_remove("sql", "proxy_$tU");
}

# SERV-783 MSRPS based on bearer
xlog("JIBE CN ($ci): Checking for bearer from P-Access-Network-Info header or network interface came through\n");
if ($hdr(P-Access-Network-Info)) {
#Mobile Network
if ($hdr(P-Access-Network-Info) =~ "3GPP") {
 xlog("JIBE CN ($ci): P-Access-Network-Info header: 3GPP detected\n");
 append_hf("P-Network-Mode: Mobile\r\n");
} else if ($hdr(P-Access-Network-Info) =~ "IEEE-802" || $hdr(P-Access-Network-Info) =~ "ethernet") {
 xlog("JIBE CN ($ci): P-Access-Network-Info header: IEEE-802 or ethernet detected\n");
 append_hf("P-Network-Mode: Wifi\r\n");
} else {
 xlog("JIBE CN ($ci): P-Access-Network-Info header: unknown detected\n");
 #Unknown?
}
} else if($var(network_mobile) != $var(network_wifi)) {
xlog("JIBE CN ($ci): No P-Access-Network-Info header found\nChecking interface received from...\n");
#No Access-Network-Info header, look at interface of which the request came in from
if(dst_ip == 127.0.0.1) {
 xlog("JIBE CN ($ci): message received on loopback interface\n");
} else if(dst_ip == ....) {
 xlog("JIBE CN ($ci): message received on mobile interface\n");
 append_hf("P-Network-Mode: Mobile\r\n");
} else if(dst_ip == ...) {
 xlog("JIBE CN ($ci): message received on wireless interface\n");
 append_hf("P-Network-Mode: Wifi\r\n");
}
}

route(third_party_register);
exit;
}




-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.opensips.org/pipermail/users/attachments/20150312/60f67849/attachment-0001.htm>


More information about the Users mailing list