繁体   English   中英

SQL自连接成对

[英]SQL self join pairwise

假设我有一个由像这样的条目组成的表

ID    Arrival Date    Arrival City    Departure Date    Departure City
1     Jun 27 2015     Berlin          Jun 20 2015       Paris
1     Jul 1 2015      Rome            Jun 29 2015       Berlin
1     Jul 30 2015     Vienna          Jul 15 2015       Rome
2     Jun 28 2015     Prague          Jun 23 2015       Vienna
2     Jul 1 2015      Rome            Jun 29 2015       Prague
2     Jul 30 2015     Vienna          Jul 15 2015       Moscow
...

对于每个ID,我想将这些数据连接到自身,以便随后的Departure DateArrival Date观察成对分组 - 即出发与每个ID的先前到达配对。

在上面的示例中(为方便起见,观察结果排序),第2行将附加到第1行,第3行到第2行,第5行到第4行以及第6行到第5行(因此产生4行,其中包含字段ID Arrival Date Arrival City Departure Date Departure City Arrival Date2 Arrival City2 Departure Date2 Departure City2 )。

每个ID可能有三个以上的离开,因此需要一般方法。 另请注意, Arrival CityDeparture City不匹配的数据中可能存在漏洞 - 例如,第5行的Arrival City不是第6行的Departure City ,但它们仍应合并。 实际上,一个主要目标是更好地了解数据中有多少个洞。

解决方案是使用CTE并考虑两个连续行(由rowno标识)之间的差异始终为1(并且还考虑日期):

;WITH CTE AS (
SELECT
    rownum = ROW_NUMBER() OVER (ORDER BY t.ID, t.arrivalDate),
    t.ID,
    t.arrivalDate, 
    t.arrivalCity, 
    t.departureDate, 
    t.departureCity
FROM #test t
)
SELECT *
FROM CTE c1
JOIN CTE c2
ON c1.ID = c2.ID 
    AND c2.departureDate > c1.arrivalDate
    AND c2.rownum - c1.rownum = 1
GO

-- structure of the #test table
CREATE TABLE #test (
    ID int,
    arrivalDate date,
    arrivalCity varchar(30),
    departureDate date,
    departureCity varchar(30)
)

SQL小提琴: SQLFiddle

试试这个:

SELECT a.id
    ,a.arrival_date
    ,a.arrival_city
    ,a.departure_date
    ,a.departure_city
    ,b.arrival_date arrival_date_2
    ,b.arrival_city arrival_city_2
    ,b.departure_date departure_date_2
    ,b.departure_city departure_city_2
FROM triptable a
JOIN triptable b ON a.id = b.id
    AND a.departure_date = (SELECT min(departure_date) FROM so34815894 x WHERE x.departure_date > b.arrival_date AND x.id = b.id)

根据您的评论编辑:

  • 找到上一个记录到达日期之后最早离开日期的记录,并且
  • 忽略了样本数据的第6条记录与第5条记录的到达城市不同的出发城市这一事实。

不完全确定你想要的结果是什么..但我想我会给它一个镜头,看看是否有任何这些帮助你。

drop table #t1 
create table #t1 (id int, ArrivalDate datetime, ArrivalCity varchar(50), Departuredate datetime, DepartureCity varchar(50))

insert into #t1 
values (1, 'Jun 27 2015', 'Berlin', 'Jun 20 2015','Paris'), 
       (1, 'Jul 1 2015', 'Rome','Jun 29 2015','Berlin'), 
       (1, 'Jul 30 2015', 'Vienna','Jul 15 2015','Rome'), 
       (2, 'Jun 28 2015','Prague','Jun 23 2015','Vienna'),
       (2, 'Jul 1 2015','Rome','Jun 29 2015','Prague'), 
       (2, 'Jul 30 2015','Vienna','Jul 15 2015','Moscow') 

select *, case when lead(departurecity) over (partition by id order by Arrivaldate) = ArrivalCity or lead(departurecity) over (partition by id order by Arrivaldate) is null then 1 else 0 end as PairID into #t2 from #t1 

update #t2 
set PairID = id 
where pairid != id 
and pairid != 0 

这是启动的代码..

select * from #t2 

将导致:

id  ArrivalDate ArrivalCity Departuredate   DepartureCity   PairID
1   2015-06-27  Berlin      2015-06-20      Paris           1
1   2015-07-01  Rome        2015-06-29      Berlin          1
1   2015-07-30  Vienna      2015-07-15      Rome            1
2   2015-06-28  Prague      2015-06-23      Vienna          2
2   2015-07-01  Rome        2015-06-29      Prague          0
2   2015-07-30  Vienna      2015-07-15      Moscow          2

对id = 0的任何位置...你有一个空白/坏数据但是你想要它...

你也可以:

select *, lead(departurecity) over (partition by ID order by ArrivalDate) as PreviousDepartureCity, lead(Departuredate) over (partition by ID order by ArrivalDate) as PreviousDepartureDate from #t2 

这将添加以前的出发城市和日期..你可以用空值做你想要的...它们将表示第一次飞行..或者如果后续对id = 0则有间隙...

选择选项变得无穷无尽....如果null和lag(pairid)= 0那么你有一个带有间隙的行..如果null和pair id = id ..和lag(pairid)= id那么你有第一次飞行..

我的意思是我可以继续......然后给你更多的细节,但我不确定这是你在想什么..希望它无论如何都有帮助..

祝好运!

PS没有看到为什么你需要加入桌子自己..也许我错过了整点......如果那是这样的话......

听起来像你想要转动结果并将结果放在其他列中。 我使用ROW_NUMBER()进行排序。 我在枢轴之前连接了一列,旋转,然后使用一个函数来反转连接。

SELECT
    p.ID,
    dbo.SplitString(p.[1], CHAR(13), 1) AS arrivalDate1,
    dbo.SplitString(p.[1], CHAR(13), 2) AS arrivalCity1,
    dbo.SplitString(p.[1], CHAR(13), 3) AS departureDate1,
    dbo.SplitString(p.[1], CHAR(13), 4) AS departureCity1,
    *
FROM
    (
        SELECT *
        FROM
        (
            SELECT
                ID,
                ROW_NUMBER() OVER (PARTITION BY ID ORDER BY arrivalDate) RowNum,
                CAST(arrivalDate AS VARCHAR(MAX)) + CHAR(13) 
                + arrivalCity + CHAR(13)
                + CAST(departureDate AS VARCHAR(MAX)) + CHAR(13)
                + departureCity TripDetails
            FROM trip t
        ) t
        PIVOT (MIN(t.TripDetails) FOR t.RowNum IN ([1], [2], [3], [4], [5] /* , ... */)) p
    ) p;

使用这个SplitString函数

CREATE FUNCTION dbo.SplitString ( 
    @stringToSplit VARCHAR(MAX),
    @delim VARCHAR(255),
    @occurence INT )
RETURNS VARCHAR(MAX) AS
BEGIN

 DECLARE @name NVARCHAR(255);

 DECLARE @pos INT;

 DECLARE @orderNum INT;

 SET @orderNum=0;

 WHILE CHARINDEX(@delim, @stringToSplit) > 0

 BEGIN
    SELECT @orderNum=@orderNum+1;
  SELECT @pos  = CHARINDEX(@delim, @stringToSplit)  ;
  SELECT @name = SUBSTRING(@stringToSplit, 1, @pos-1);

  IF @orderNum = @occurence
  BEGIN
    RETURN @name;
  END

  SELECT @stringToSplit = SUBSTRING(@stringToSplit, @pos+1, LEN(@stringToSplit)-@pos)
 END

    SELECT @orderNum=@orderNum+1;

  IF @orderNum = @occurence
  BEGIN
    RETURN @stringToSplit;
  END

  RETURN NULL;
END

这应该工作:

with cte as(select *, row_number() over(partition by id order by date) rn from table)
select * from cte c1
join cte c2 on c1.id = c2.id and c1.rn = c2.rn - 1

试试这个,

declare @t table(ID int,ArrivalDate datetime, ArrivalCity varchar(50)
,DepartureDate datetime,DepartureCity varchar(50))
insert into  @t values
(1,     'Jun 27 2015',     'Berlin',          'Jun 20 2015',       'Paris  ')
,(1,     'Jul 1 2015 ',     'Rome  ',          'Jun 29 2015',       'Berlin ')
,(1,     'Jul 30 2015',     'Vienna',          'Jul 15 2015',       'Rome    ')
,(2,     'Jun 28 2015',     'Prague',          'Jun 23 2015',       'Vienna ')
,(2,     'Jul 1 2015 ',     'Rome  ',          'Jun 29 2015',       'Prague ')
,(2  ,   'Jul 30 2015',     'Vienna',          'Jul 15 2015',       'Moscow ')

;WITH CTE
AS (
    SELECT *
        ,ROW_NUMBER() OVER (
            ORDER BY id
                ,arrivaldate
            ) rn
    FROM @t
    )
SELECT A.arrivaldate
    ,a.arrivalcity
    ,a.DepartureDate
    ,a.DepartureCity
    ,b.arrivaldate
    ,b.arrivalcity
    ,b.DepartureDate
    ,b.DepartureCity
FROM CTE A
LEFT JOIN CTE b ON a.rn + 1 = b.rn

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM