简体   繁体   中英

Why does my PostgreSQL C function crash?

We have two legacy aggregate functions which we are using, which concatenate text values, delmited by a bar ( | ), or comma ( , ). These functions are essentially very slow implementations of string_agg() , which appeared in postgresql 9.0.

In order to obtain the performance of string_agg() without having to modifiy code which uses our bar_list() or comma_list() functions, I am attempting to create C implementations for those functions which basically ape string_agg() .

Unfortunately, I am NOT a C programmer.

Regardless, after finding the string_agg() source in varlena.c in the postgresql source, I set about reusing that code:

#include "postgres.h"                                 
#include <ctype.h>
#include <syslog.h>
#include <sys/types.h>
#include <unistd.h>
#include "lib/stringinfo.h"
#include "fmgr.h"
#include "utils/builtins.h"

PG_MODULE_MAGIC;

/*
Compile this module in the following way:

   gcc -I $(pg_config --includedir-server) -fpic -c vs_funcs.c;
   gcc -shared -o $(pg_config --pkglibdir)/vs_funcs.so vs_funcs.o;

This will also place the compiled module in the correct location.
*/

// Code below taken directly or modified from varlena.c in postgresl source

/*
 * appendStringInfoText
 *
 * Append a text to str.
 * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
*/
static void
appendStringInfoText(StringInfo str, const text *t) 
{
    appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
}

// This is largely copied from the code for the string_agg function
#define MAKE_STR_AGG(AGG_NAME, AGG_DELIM) \
        static StringInfo \
        make_ ## AGG_NAME ##_state(FunctionCallInfo fcinfo) \
        { \
            StringInfo  state; \
            MemoryContext aggcontext; \
            MemoryContext oldcontext; \
            if (!AggCheckCallContext(fcinfo, &aggcontext)) \
            { \
                elog(ERROR, "#AGG_NAME called in non-aggregate context"); \
            } \
            oldcontext = MemoryContextSwitchTo(aggcontext); \
            state = makeStringInfo(); \
            MemoryContextSwitchTo(oldcontext); \
            return state; \
        } \
        PG_FUNCTION_INFO_V1(AGG_NAME ## _transfn); \
        Datum \
        AGG_NAME ## _transfn(PG_FUNCTION_ARGS) \
        { \
        { \
            StringInfo  state; \
            state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); \
            if (!PG_ARGISNULL(1)) \
            { \
                if (state == NULL) \
                    state = make_ ## AGG_NAME ## _state(fcinfo); \
                else if (!PG_ARGISNULL(2)) \
                    appendStringInfoText(state, string_to_text(#AGG_DELIM)); \
                appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); \
            } \
            PG_RETURN_POINTER(state); \
        }\
        PG_FUNCTION_INFO_V1(AGG_NAME ## _finalfn); \
        Datum \
        AGG_NAME ## _finalfn(PG_FUNCTION_ARGS) \
        { \
            StringInfo  state; \
            Assert(AggCheckCallContext(fcinfo, NULL)); \
            state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); \
            if (state != NULL) \
                PG_RETURN_TEXT_P(cstring_to_text(state->data)); \
            else \
                PG_RETURN_NULL(); \
        }

/*
 * bar_list - Concatenates values and returns string.
 *
 * Syntax: bar_list(value text) RETURNS text
 *
 * Note: Any NULL values are ignored. The first-call delimiter isn't
 * actually used at all, and on subsequent calls the delimiter precedes
 * the associated value.
 */
MAKE_STR_AGG(bar_list, '|');

/*
 * comma_list - Concatenates values and returns string.
 *
 * Syntax: comma_list(value text) RETURNS text
 *
 * Note: Any NULL values are ignored. The first-call delimiter isn't
 * actually used at all, and on subsequent calls the delimiter precedes
 * the associated value.
 */
MAKE_STR_AGG(comma_list, ',');      

When I compile this, it appears to complete successfully, with no errors:

vezult@cruella-devel:~/Development/VistaShare/oct_git/utilities$ make
gcc -fpic -c -I/usr/include/postgresql/9.0/server vs_funcs.c
gcc -shared -o vs_funcs.so vs_funcs.o;

symbols in the resulting lib:

vezult@cruella-devel:~/Development/VistaShare/oct_git/utilities$ nm vs_funcs.so 
                 U AggCheckCallContext
                 U CurrentMemoryContext
00000000000009dc t MemoryContextSwitchTo
0000000000000ec0 r Pg_magic_data.8975
0000000000000a06 T Pg_magic_func
0000000000201138 a _DYNAMIC
00000000002012e8 a _GLOBAL_OFFSET_TABLE_
                 w _Jv_RegisterClasses
0000000000201118 d __CTOR_END__
0000000000201110 d __CTOR_LIST__
0000000000201128 d __DTOR_END__
0000000000201120 d __DTOR_LIST__
0000000000001108 r __FRAME_END__
0000000000201130 d __JCR_END__
0000000000201130 d __JCR_LIST__
0000000000201348 A __bss_start
                 w __cxa_finalize@@GLIBC_2.2.5
0000000000000de0 t __do_global_ctors_aux
0000000000000930 t __do_global_dtors_aux
0000000000201340 d __dso_handle
0000000000000ea0 r __func__.9026
0000000000000e80 r __func__.9096
                 w __gmon_start__
0000000000201348 A _edata
0000000000201358 A _end
0000000000000e18 T _fini
0000000000000868 T _init
                 U appendBinaryStringInfo
0000000000000a13 t appendStringInfoText
0000000000000bec T bar_list_finalfn
0000000000000b34 T bar_list_transfn
0000000000000910 t call_gmon_start
0000000000000d82 T comma_list_finalfn
0000000000000cca T comma_list_transfn
0000000000201348 b completed.6341
                 U cstring_to_text
0000000000201350 b dtor_idx.6343
                 U elog_finish
                 U elog_start
00000000000009b0 t frame_dummy
                 U makeStringInfo
0000000000000aab t make_bar_list_state
0000000000000c41 t make_comma_list_state
0000000000000e9c r my_finfo.9039
0000000000000e98 r my_finfo.9072
0000000000000e78 r my_finfo.9109
0000000000000e74 r my_finfo.9142
                 U pg_detoast_datum_packed
0000000000000bdf T pg_finfo_bar_list_finalfn
0000000000000b27 T pg_finfo_bar_list_transfn
0000000000000d75 T pg_finfo_comma_list_finalfn
0000000000000cbd T pg_finfo_comma_list_transfn

I create the function in postgres:

CREATE FUNCTION bar_list_transfn(text, text) RETURNS text AS 'vs_funcs', 'bar_list_transfn' language 'C';

And when I run it, it crashes postgres with a segmentation fault:

2011-09-09 10:02:15 EDT LOG:  server process (PID 15881) was terminated by signal 11: Segmentation fault

So, it seems to me that my code should work very similarly to the built-in function, so I'm not sure exactly why it doesn't work. I quite honestly don't know whether the compile-time warnings are significant, or if the real problem is elsewhere.

What am I doing wrong?

It looks like your SQL code is using the wrong types. From the source code ( src/include/catalog/pg_proc.h ):

DATA(insert OID = 3535 (  string_agg_transfn        PGNSP PGUID 12 1 0 0 0 f f f f f i 3 0 2281 "2281 25 25" _null_ _null_ _null_ _null_ string_agg_transfn _null_ _null_ _null_ ));
DESCR("aggregate transition function");
DATA(insert OID = 3536 (  string_agg_finalfn        PGNSP PGUID 12 1 0 0 0 f f f f f i 1 0 25 "2281" _null_ _null_ _null_ _null_ string_agg_finalfn _null_ _null_ _null_ ));
DESCR("aggregate final function");
DATA(insert OID = 3538 (  string_agg                PGNSP PGUID 12 1 0 0 0 t f f f f i 2 0 25 "25 25" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
DESCR("concatenate aggregate input into a string");

In particular:

... string_agg_transfn ... 2281 "2281 25 25" ...
... string_agg_finalfn ... 25   "2281"       ...
... string_agg         ... 25   "25 25"      ...

The first OID is the return type. The subsequent OIDs are argument types. These OIDs can be decoded by querying the catalog:

joey=# SELECT oid, typname FROM pg_type WHERE oid IN (25, 2281);
 oid  | typname  
------+----------
   25 | text
 2281 | internal
(2 rows)

Thus, the correct SQL statements should be:

CREATE FUNCTION bar_list_transfn(internal, text)
RETURNS internal
AS 'vs_funcs', 'bar_list_transfn'
LANGUAGE 'C';

CREATE FUNCTION bar_list_finalfn(internal)
RETURNS text
AS 'vs_funcs', 'bar_list_finalfn'
LANGUAGE 'C';

CREATE AGGREGATE bar_list_agg (
    BASETYPE  = text,             -- item type
    SFUNC     = bar_list_transfn, -- transition function
    STYPE     = internal,         -- state type
    FINALFUNC = bar_list_finalfn  -- final calculation function
);

If all you want to do is to create a wrapper around array_agg() you can do that completely without C.

This answer How to concatenate strings of a string field in a PostgreSQL 'group by' query? explains how you can create your own aggregate with pure SQL (and without writing a single line of C code)

I don't know whether that qualifies for your "high performance" though.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM