如何创建可以从 DBeaver 调用的 PostgreSQL 函数？

Question

这是示例日期：

CREATE TABLE #logins (
    username text not null,
    logged_at timestamp not null);  
insert into #logins (username, logged_at) values
    ('a','2019-01-01'),('b','2019-01-01'),('c','2019-01-01'),('d','2019-01-01'),('e','2019-01-01'),        
    ('a','2019-02-01'),('b','2019-02-01'),('c','2019-02-01'),('f','2019-02-01'),('g','2019-02-01'),
    ('h','2019-02-01'),('i','2019-02-01'),('j','2019-02-01'),('a','2019-03-01'),('b','2019-03-01'),
    ('f','2019-03-01'),('h','2019-03-01'),('g','2019-03-01'),('k','2019-03-01'),('l','2019-03-01'),
    ('m','2019-03-01'),('n','2019-03-01'),('o','2019-03-01'),('a','2019-04-01'),('f','2019-04-01'),   
    ('g','2019-04-01'),('k','2019-04-01'),('l','2019-04-01')`

我通常做什么

drop table if exists #a;
create table #a as 
select username, min(logged_at) as date from #logins --Please note that there is **MIN()** here 
group by 1;


alter table #a 
add m_1 varchar;
update #a
set m_1 = (select username from #logins 
            where add_months(#a.date,1) = #logins.logged_at and #logins.username = #a.username);

alter table #a 
add m_2 varchar;
update #a
set m_2 = (select username from #logins 
            where add_months(#a.date,2) = #logins.logged_at and #logins.username = #a.username);

alter table #a 
add m_3 varchar;
update #a
set m_3 = (select username from #logins 
            where add_months(#a.date,1) = #logins.logged_at and #logins.username = #a.username);

select to_date(date,'yyyy-mm') as date, count(username) as num_acc,
                                    count(m_1) as m_1,
                                    count(m_2) as m_2,
                                    count(m_3) as m_3
from #a
group by 1
order by 1

预期结果：

             num_acc   m_1     m_2     m_3
2019-01-01      5       3       2       3
2019-02-01      5       3       2       3
2019-03-01      5       2       0       2

从这一点开始，我将下载数据并在队列中对其进行可视化

关键是我想创建一个方便的函数。 我正在使用 PostgreSQL 开发 Dbeaver，以供您参考。 在这个函数中，我们只需要输入一个带有 ID 和日期的表，然后它就会自动执行这个过程。

到目前为止，这是我的尝试：

 CREATE OR REPLACE FUNCTION test(timestamp,varchar(255)) 
    RETURNS int 
declare
    counter integer :=1
stable 
AS $$

   LOOP 
      EXIT WHEN counter = 6 ; 
      counter := counter + 1 ; 
      alter table #a
      add counter varchar;
     update #a
     counter = select user_name from #logins 
                where add_month(#logins.logged_at,counter) = #a.first_login 
                #a.first_login and #logins.username = #a.username
   END LOOP 

$$ LANGUAGE sql;

这很尴尬，因为 SQL 中的function相当困难。 这是我能做的最好的事情。

（p/s：请LANGUAGE plpythonu不能使用。我们唯一的选择是sql ）

Answer 1

修订：纳入额外要求

有了新信息，可以进行一些小的调整。 由于“无论您在一个月内登录多少次，我们只计算1，基于用户名”。 我们将使用 Posrgres date_trunc 函数查看每月的 1 日，而不是寻找相等的日期，无论实际登录日期是什么。 继续使用 WHERE EXISTS 确保无论用户有多少登录，我们都只计算 1。因此REVISED函数：

create or replace function collect_user_login_counts(login_start_in date) 
 returns table( "Date"   text
              , num_acc  bigint
              , m_1      bigint 
              , m_2      bigint 
              , m_3      bigint
              )               
 language sql strict
as $$
    -- work table exists for single execution so clear any existing data
    truncate user_login_wrk;

    with su_dater as
      -- get each user and the earliest date of login such that the login date in not less than parameter date 
          (select l0.username, min(date_trunc('month', l0.logged_at))::date logged_at
             from logins l0
            where date_trunc('month', l0.logged_at)::date >= date_trunc('month', login_start_in)::date
            group by l0.username
          )
       , inserter as 
         -- insert the the counter table the user name for least login date and the following 3 months, 
         -- return each row for subsequent summerization  
         ( insert into user_login_wrk(username, logged_at, m_1,m_2,m_3)
             select su.username
                  , su.logged_at
                  , (select su.username where exists 
                                        (select null 
                                           from logins l1 
                                          where l1.username = su.username 
                                            and date_trunc('month',l1.logged_at)::date = (su.logged_at + interval '1 month')::date))
                  , (select su.username where exists 
                                        (select null 
                                           from logins l2 
                                          where l2.username = su.username 
                                            and date_trunc('month',l2.logged_at)::date = (su.logged_at + interval '2 month')::date))
                  , (select su.username where exists 
                                        (select null 
                                           from logins l3
                                          where l3.username = su.username 
                                            and date_trunc('month',l3.logged_at)::date = (su.logged_at + interval '3 month')::date))                                              

              from su_dater su  
          returning * 
         )
    -- summarize count on user logins over period current and next 3 months result returned caller         
    select to_char(ulc.logged_at,'yyyy-mm')  
         , count(ulc.username)  
         , count(ulc.m_1)  
         , count(ulc.m_2)  
         , count(ulc.m_3) 
      from  inserter ulc
      where ulc.logged_at >= date_trunc('month',login_start_in)::date
      group by to_char(logged_at,'yyyy-mm')
      order by to_char(logged_at,'yyyy-mm'); 
$$;

测试：为了测试，我更改了您的原始日期，因此没有实际具有当月 1 日的行，也没有在同一天编号的行。 此外，函数的参数日期不会出现在数据中。

truncate logins;
insert into logins (username, logged_at) values
        ('a','2019-01-03'),('b','2019-01-04'),('c','2019-01-11'),('d','2019-01-15'),('e','2019-01-21'),        
        ('a','2019-02-06'),('b','2019-02-02'),('c','2019-02-04'),('f','2019-02-08'),('g','2019-02-09'),
        ('h','2019-02-12'),('i','2019-02-24'),('j','2019-02-26'),('a','2019-03-02'),('b','2019-03-03'),
        ('f','2019-03-05'),('h','2019-03-11'),('g','2019-03-17'),('k','2019-03-31'),('l','2019-03-09'),
        ('m','2019-03-29'),('n','2019-03-27'),('o','2019-03-24'),('a','2019-04-06'),('f','2019-04-03'),   
        ('g','2019-04-14'),('k','2019-04-30'),('l','2019-04-11'); 
select collect_user_login_counts(date '2019-01-18');        -- select as row
select * from collect_user_login_counts(date '2019-01-18'); -- select as individual columns

结果

Date    | num_acc| m_1| m_2| m_3
________________________________
2019-01 | 5      | 3  | 2  | 1
2019-02 | 5      | 3  | 2  | 0
2019-03 | 5      | 2  | 0  | 0

尽管数据发生了变化，但产生了相同的结果。 顺便提一句。 我确实用你的数据测试了原件。 除了 m_3 之外，这些结果完全符合您的期望，原始回复中对此进行了解释。 我只是没有发布它，我的错误。

*原回复**
好吧，您发布的代码存在一些问题。 正如@a_horse_with_no_name 指出的# 字符在Postgres 对象名称中无效，除非名称是双引号括起来（即“#logins”）而不管模式如何。 此外，Postges 没有函数 add_months （您可以将它作为用户编写的函数使用，但我不知道。）

我注意到与您的预期结果不一致。 首先，您“通常所做”的最终查询无法产生这些结果。 查询返回年月为日期，预期有年月日。 我会假设年月。 其次，我相信 m_3 预期输出是不正确的。 这是由于您使用了 add_months(#a.date,1) 的集合 m_3。 我相信从命名结构和先前的设置来看，这是一个复制/过去的错字，应该阅读 add_months(#a.date,3)。 我会假设是后者。 然而，这确实改变了 m_3 列的结果。

您发布的功能中有一个项目我还没有完全理解。 我不确定神奇的数字 6 在做什么。 您是否尝试创建列 m_1 到 m_6，这似乎是。 然而，代码实际上会尝试创建列名计数器 6 次，这将在第二次失败。 在下面的函数中，我将保留 m_1 到 m_3。 如果 m_6 是您的目标，只需根据需要复制 m_1 编辑。 （还必须更新表定义）。

做了一些改变：

我不命名列日期。 它是一个保留字，虽然您现在可以摆脱它，但随时可能改变。 所以我将在工作表中使用logged_at。
我不喜欢 DB 对象的单字符名称所以 #a 变成了 user_login_wrk。
我避免在函数中使用 DML（创建、更改）。 所以表是在外部创建的。 除了对于 SQL 函数，它必须最初存在，除非整个函数是动态 SQL 和单个字符串。

考虑到所有这些，我们得到：

-- create 'months' work table
create table user_login_wrk( username  text
                           , logged_at date
                           , m_1       text
                           , m_2       text
                           , m_3       text
                           );

现在是重头戏。

create or replace function collect_user_login_counts(login_start_in date) 
 returns table( "Date"   text
              , num_acc  bigint
              , m_1      bigint 
              , m_2      bigint 
              , m_3      bigint
              )               
 language sql strict
as $$
    -- work table exists for single execution so clear any existing data
    truncate user_login_wrk;

    with su_dater as
          -- get each user and the earliest date of login such that the login date in not less than parameter date 
          (select l0.username, min(l0.logged_at)::date logged_at
             from logins l0
            where l0.logged_at::date >= login_start_in
            group by l0.username
          )
       , inserter as 
         -- insert the the counter table the user name for least login date and the following 3 months, 
         -- return each row for subsequent summerization  
         ( insert into user_login_wrk(username, logged_at, m_1,m_2,m_3)
             select su.username
                  , su.logged_at
                  , (select su.username where exists (select null from logins l1 where l1.username = su.username and l1.logged_at = su.logged_at + interval '1 month')) 
                  , (select su.username where exists (select null from logins l2 where l2.username = su.username and l2.logged_at = su.logged_at + interval '2 month'))
                  , (select su.username where exists (select null from logins l3 where l3.username = su.username and l3.logged_at = su.logged_at + interval '3 month'))    
               from su_dater su  
          returning * 
         )
    -- summerize count on user logins over period current and next 3 months result returned caller         
    select to_char(ulc.logged_at,'yyyy-mm')  
         , count(ulc.username)  
         , count(ulc.m_1)  
         , count(ulc.m_2)  
         , count(ulc.m_3) 
      from  inserter ulc
      where ulc.logged_at >= login_start_in
      group by to_char(logged_at,'yyyy-mm')
      order by to_char(logged_at,'yyyy-mm'); 
$$;
-- test
select collect_user_login_counts(date '2019-01-01');        -- select as row
select * from collect_user_login_counts(date '2019-01-01'); -- select as individual columns
select * from collect_user_login_counts(date '2019-02-01'); -- Next month

以上完全刷新工作表并重建它。 但是，有时需要/需要查看上次运行的结果。 下面提供了该功能。 （请注意，如果需要，可以提取实际查询并单独运行。

create or replace function show_user_login_counts()
 returns table( "Date"   text
              , num_acc  bigint
              , m_1      bigint 
              , m_2      bigint 
              , m_3      bigint
              )               
 language sql strict
as $$    
    select to_char(ulc.logged_at,'yyyy-mm')  
         , count(ulc.username)  
         , count(ulc.m_1)  
         , count(ulc.m_2)  
         , count(ulc.m_3) 
     from user_login_wrk ulc
    group by to_char(logged_at,'yyyy-mm')
    order by to_char(logged_at,'yyyy-mm')  ; 
$$;
-- test
select show_user_login_counts();        -- select as row
select * from show_user_login_counts(); -- select as individual columns

有几个问题没有解决。 目前每个后续 (m_1,m_2,m_3) 都是从开始日期开始的确切月份？如果用户登录的日期不是确切日期而是第二天，会发生什么？ 也不允许用户在一个月内多次登录。 好吧，这些是改天的问题。

如何创建可以从 DBeaver 调用的 PostgreSQL 函数？

问题描述

1 个解决方案

解决方案1
2 已采纳 2019-12-18 03:18:02

如何创建可以从 DBeaver 调用的 PostgreSQL 函数？

问题描述

1 个解决方案

解决方案1 2 已采纳 2019-12-18 03:18:02

解决方案1
2 已采纳 2019-12-18 03:18:02