关于sql:PostgreSQL中的分组LIMIT:显示每组的前N行?

Grouped LIMIT in PostgreSQL: show the first N rows for each group?

我需要为每个组取前N行,按自定义列排序。

鉴于下表:

1
2
3
4
5
6
7
8
9
10
11
12
db=# SELECT * FROM xxx;
 id | section_id | name
----+------------+------
  1 |          1 | A
  2 |          1 | B
  3 |          1 | C
  4 |          1 | D
  5 |          2 | E
  6 |          2 | F
  7 |          3 | G
  8 |          2 | H
(8 ROWS)

我需要每个section_id的前两行(按名称排序),即类似于的结果:

1
2
3
4
5
6
7
8
 id | section_id | name
----+------------+------
  1 |          1 | A
  2 |          1 | B
  5 |          2 | E
  6 |          2 | F
  7 |          3 | G
(5 ROWS)

我正在使用PostgreSQL 8.3.5。


新解决方案(PostgreSQL 8.4)

1
2
3
4
5
6
7
8
9
10
SELECT
  *
FROM (
  SELECT
    ROW_NUMBER() OVER (PARTITION BY section_id ORDER BY name) AS r,
    t.*
  FROM
    xxx t) x
WHERE
  x.r <= 2;


从v9.3开始,你可以进行横向连接

1
2
3
4
5
6
7
8
SELECT DISTINCT t_outer.section_id, t_top.id, t_top.name FROM t t_outer
JOIN lateral (
    SELECT * FROM t t_inner
    WHERE t_inner.section_id = t_outer.section_id
    ORDER BY t_inner.name
    LIMIT 2
) t_top ON TRUE
ORDER BY t_outer.section_id;

它可能更快,但当然,您应该专门针对您的数据和用例测试性能。


这是另一个解决方案(PostgreSQL <= 8.3)。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
SELECT
  *
FROM
  xxx a
WHERE (
  SELECT
    COUNT(*)
  FROM
    xxx
  WHERE
    section_id = a.section_id
  AND
    name <= a.name
) <= 2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
        -- ranking without WINDOW functions
-- EXPLAIN ANALYZE
WITH rnk AS (
        SELECT x1.id
        , COUNT(x2.id) AS rnk
        FROM xxx x1
        LEFT JOIN xxx x2 ON x1.section_id = x2.section_id AND x2.name <= x1.name
        GROUP BY x1.id
        )
SELECT this.*
FROM xxx this
JOIN rnk ON rnk.id = this.id
WHERE rnk.rnk <=2
ORDER BY this.section_id, rnk.rnk
        ;

        -- The same without using a CTE
-- EXPLAIN ANALYZE
SELECT this.*
FROM xxx this
JOIN ( SELECT x1.id
        , COUNT(x2.id) AS rnk
        FROM xxx x1
        LEFT JOIN xxx x2 ON x1.section_id = x2.section_id AND x2.name <= x1.name
        GROUP BY x1.id
        ) rnk
ON rnk.id = this.id
WHERE rnk.rnk <=2
ORDER BY this.section_id, rnk.rnk
        ;


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
SELECT  x.*
FROM    (
        SELECT  section_id,
                COALESCE
                (
                (
                SELECT  xi
                FROM    xxx xi
                WHERE   xi.section_id = xo.section_id
                ORDER BY
                        name, id
                OFFSET 1 LIMIT 1
                ),
                (
                SELECT  xi
                FROM    xxx xi
                WHERE   xi.section_id = xo.section_id
                ORDER BY
                        name DESC, id DESC
                LIMIT 1
                )
                ) AS mlast
        FROM    (
                SELECT  DISTINCT section_id
                FROM    xxx
                ) xo
        ) xoo
JOIN    xxx x
ON      x.section_id = xoo.section_id
        AND (x.name, x.id) <= ((mlast).name, (mlast).id)