Monday, February 1, 2016

Netezza: create UDX concatenating row values into coma delimited string GROUP_CONCAT

Upload files to Netezza server and execute install.sh:    bash install.sh

Note: the same string aggeregation functionality can be achieved without installation of User-Defined function. Read my post: Netezza: concatenate rows into string with XMLAGG()


Installer shell script code:

#!/bin/bash

DB=$1if [[ -z $DB ]]; then
 DB=$NZ_DATABASEfi

if [[ -z $DB ]]; then
 print "Usage: install <database>"
 return 1
fi

export NZ_DATABASE="BIG_DATA_DATABASE"
export NZ_USER="ALEX.X.JOHNSON"
export NZ_PASSWORD="neverTell_123"

nzudxcompile GroupConcat.cpp \
 --unfenced \
 --sig   "group_concat(varchar(any))" \
 --state  "(varchar(4000))" \
 --return  "varchar(4000)" \
 --class  "GroupConcat"

nzudxcompile GroupConcatSep.cpp \
 --unfenced \
 --sig   "group_concat(varchar(any),varchar(1))" \
 --state  "(varchar(4000),varchar(1))" \
 --return  "varchar(4000)" \
 --class  "GroupConcatSep"

rm *.o_*




Create Netezza user-defined procedure: C++ program for GROUP_CONCAT:

// (c) 2013 IBM Corporation.// This source code is provided free of charge by IBM Corporation as a// convenience to its customers.  This source code is provided "AS-IS" with// no warranty whatsoever.  The customer accepts all risk in connection// with the use of this script, and IBM Corporation shall have no// liability whatsoever.

// Usage: select group_concat(string) from <table> group by columns;
 
 
#include "udxinc.h"#include <string.h>#include <list>
 
#define DATA_DELIM 0
#define OUT_DELIM ','

using namespace nz::udx;
 
class GroupConcat : public Uda
{
    public:
        static Uda* instantiate();        

    void initializeState()
    {
        setStateNull(0, true );             
    }
 
    void accumulate()
    {
        if( isArgNull(0) ) return;
 
        StringArg* a = stringArg(0);
        StringArg* s = stringState(0);
 
        if( isStateNull(0)) {
            setStateNull(0, false );
 
            int len = a->length;
            if (len > s->dec_length)
                len = s->dec_length;
            memcpy(s->data, a->data, len);
            s->length = len;
            return;
        }

        int curlen = s->length;
        if(curlen >= s->dec_length)
            return;

        s->data[curlen] = DATA_DELIM;
        curlen++;

        int len=a->length;
        if( len + curlen > s->dec_length )
            len = s->dec_length - curlen;

        memcpy( s->data + curlen, a->data, len );
        s->length = curlen + len;
    }
 
    void merge()
    {
        if( isArgNull(0) ) return;
 
        StringArg* a = stringArg(0);
        StringArg* s = stringState(0);
 
        if( isStateNull(0)) {
            setStateNull(0, false );
            memcpy(s->data, a->data, a->length);
            s->length = a->length;
            return;
       }
 
       int curlen = s->length;
       if(curlen >= s->dec_length)
           return;

       s->data[curlen] = DATA_DELIM;
       curlen++;

       int len = a->length;
       if( len + curlen > s->dec_length )
           len = s->dec_length - curlen;

       memcpy( s->data + curlen, a->data, len );
       s->length = curlen + len;
    }
 
    ReturnValue finalResult()
    {
        std::string *str;
        std::list<std::string> values;

        if( isArgNull(0)) NZ_UDX_RETURN_NULL();
 
        StringReturn *r = stringReturnInfo();
        StringArg *s = stringArg(0);

        if( r->size < s->length )
            throwUdxException( "GROUP_CONCAT(): Result size too large!" );

        r->size = s->length;

        int len=0;
        char *p = s->data;

        for( int i=0; i<s->length; i++ )
        {
            if(s->data[i] != DATA_DELIM) {
                len++;
            } else {
                str = new string(p, len);
                values.push_back(*str);
                p = s->data + i + 1;
                len = 0;
            }
        }

        str = new string(p,len);
        values.push_back(*str);

        values.sort();

        setReturnNull(false);

        len = 0;
        while(!values.empty())
        {
            if( len>0 )
                r->data[len++] = OUT_DELIM;

            std::string tmp = values.front();
            memcpy(r->data+len, tmp.c_str(), tmp.length());
            len += tmp.length();
            values.pop_front();
        }
            
        NZ_UDX_RETURN_STRING(r);
    }
};
 
Uda* GroupConcat::instantiate()
{
    return new GroupConcat;
}




Create Netezza user-defined procedure: C++ program for GROUP_CONCAT:


//   This source code is provided "AS-IS" with// no warranty whatsoever.  
//   Usage: select group_concat(string,char) from <table> group by columns;
 
 
#include "udxinc.h"#include <string.h>#include <list>
 
#define DATA_DELIM 0

using namespace nz::udx;
 
class GroupConcatSep : public Uda
{
    public:
        static Uda* instantiate();        

    void initializeState()
    {
        setStateNull(0, true );             
    }
 
    void accumulate()
    {
        if( isArgNull(0))
            return;

 
        StringArg* a = stringArg(0);
        StringArg* s = stringState(0);
 
        if( isStateNull(0)) {
            setStateNull(0, false );

            if( isArgNull(1)) {
                setStateNull(1, true );
            } else {
                StringArg* sep = stringArg(1);
                StringArg* sepState = stringState(1);
                if( sep->length > 0 ) {
                    setStateNull(1, false );
                    sepState->data[0] = sep->data[0];
                    sepState->length = 1;
                }
            }

 
            int len = a->length;
            if (len > s->dec_length)
                len = s->dec_length;
            memcpy(s->data, a->data, len);
            s->length = len;
            return;
        }

        int curlen = s->length;
        if(curlen >= s->dec_length)
            return;

        s->data[curlen] = DATA_DELIM;
        curlen++;

        int len=a->length;
        if( len + curlen > s->dec_length )
            len = s->dec_length - curlen;

        memcpy( s->data + curlen, a->data, len );
        s->length = curlen + len;
    }
 
    void merge()
    {
        if( isArgNull(0) ) return;
 
        StringArg* a = stringArg(0);
        StringArg* s = stringState(0);
 
        if( isStateNull(0)) {
            setStateNull(0, false );

            if( isArgNull(1)) {
                setStateNull(1, true );
            } else {
                setStateNull(1, false );
                StringArg* sep = stringArg(1);
                StringArg* sepState = stringState(1);
                sepState->data[0] = sep->data[0];
                sepState->length = 1;
            }
             
            memcpy(s->data, a->data, a->length);
            s->length = a->length;

            return;
       }
 
       int curlen = s->length;
       if(curlen >= s->dec_length)
           return;

       s->data[curlen] = DATA_DELIM;
       curlen++;

       int len = a->length;
       if( len + curlen > s->dec_length )
           len = s->dec_length - curlen;

       memcpy( s->data + curlen, a->data, len );
       s->length = curlen + len;
    }
 
    ReturnValue finalResult()
    {
        std::string *str;
        std::list<std::string> values;

        if( isArgNull(0)) NZ_UDX_RETURN_NULL();
 
        StringReturn *r = stringReturnInfo();
        StringArg *s = stringArg(0);
        StringArg *sep = stringArg(1);

        if( r->size < s->length )
            throwUdxException( "GROUP_CONCAT(): Result size too large!" );

        r->size = s->length;

        int len=0;
        char *p = s->data;

        for( int i=0; i<s->length; i++ )
        {
            if(s->data[i] != DATA_DELIM) {
                len++;
            } else {
                str = new string(p, len);
                values.push_back(*str);
                p = s->data + i + 1;
                len = 0;
            }
        }

        str = new string(p,len);
        values.push_back(*str);

        values.sort();

        setReturnNull(false);

        len = 0;
        while(!values.empty())
        {
            if( len>0 && !isArgNull(1))
                r->data[len++] = sep->data[0];

            std::string tmp = values.front();
            memcpy(r->data+len, tmp.c_str(), tmp.length());
            len += tmp.length();
            values.pop_front();
        }
            
        NZ_UDX_RETURN_STRING(r);
    }
};
 
Uda* GroupConcatSep::instantiate()
{
    return new GroupConcatSep;
}





No comments:

Post a Comment