Issue
My SQL Server table looks like this:
id Distance a b Grp
--------------------------------
1 0.0000000000 100 114 NULL
2 0.1000000000 64 125 NULL
3 0.1000000000 88 100 NULL
4 0.1000000000 65 125 NULL
5 0.1000000000 63 64 NULL
6 0.1000000000 65 66 NULL
7 0.2000000000 63 66 NULL
8 0.2000000000 10 61 NULL
9 0.2000000000 19 61 NULL
10 0.2000000000 30 61 NULL
11 0.2000000000 10 65 NULL
12 0.2000000000 10 94 NULL
13 0.2000000000 19 65 NULL
14 0.2000000000 19 94 NULL
15 0.2000000000 30 94 NULL
16 0.2000000000 60 94 NULL
17 0.2000000000 61 94 NULL
The Grp
column should be filled as follows
first record
Grp
is 1if the next row's values of a & b are in any of the previous rows, then it will take the first rows
Grp
valueif the next row's values of a & b are not in any of the previous rows, then
Grp
value will bemax Grp + 1
if the record id = 3 then the value of b = 100 which exists in the previous rows, the first one it appears in was id = 1 which is
Grp = 1
so theGrp
will be 1 for id 3
This is what my table should look like:
id Distance a b Grp
--------------------------------
1 0.0000000000 100 114 1
2 0.1000000000 64 125 2
3 0.1000000000 88 100 1
4 0.1000000000 65 125 2
5 0.1000000000 63 64 2
6 0.1000000000 65 66 2
7 0.2000000000 63 66 2
8 0.2000000000 10 61 3
9 0.2000000000 19 61 3
10 0.2000000000 30 61 3
11 0.2000000000 10 65 2
12 0.2000000000 10 94 3
13 0.2000000000 19 65 2
14 0.2000000000 19 94 3
15 0.2000000000 30 94 3
16 0.2000000000 60 94 3
17 0.2000000000 61 94 3
I have built this script that works fine, but it's extremely slow, any way I can get it better (without a loop)?
DECLARE @T AS TABLE
(
id int IDENTITY,
Distance decimal(18, 10),
a int,
b int,
Grp int
)
INSERT INTO @T(Distance, a, b)
SELECT Distance, a, b
FROM MyTable
ORDER BY Distance
UPDATE @T
SET Grp = 1
WHERE id = 1
DECLARE @i int = 2, @max int, @min int,
@grp int, @a int, @b int, @maxgrp int = 1
SELECT @max = MAX(id) FROM @T
WHILE @i <= @max
BEGIN
SELECT @a = a, @b = b
FROM @T
WHERE id = @i
SELECT @min = MIN(id)
FROM @T
WHERE id < @i AND a IN (@a, @b) OR b IN (@a, @b)
SELECT @grp = grp
FROM @T
WHERE id = @min
IF @grp IS NULL
BEGIN
SET @maxgrp = @maxgrp + 1
SET @grp = @maxgrp
END
UPDATE @T
SET Grp = @grp
WHERE id = @i
SET @i = @i + 1
END
SELECT * FROM @T
Solution
An answer using recursion instead of a loop.
First, identify the first row any value (from a or b) appears on...
CREATE TABLE #node(
row_id INT,
old_val INT,
new_val INT,
link_count INT
INDEX node_old_new CLUSTERED (link_count, old_val, new_val, row_id),
);
INSERT INTO
#node
SELECT
e.id, twinned.*, COUNT(*) OVER (PARTITION BY e.id)
FROM
#example AS e
CROSS APPLY
(
SELECT e.a AS old_val, e.b AS new_val
UNION ALL
SELECT e.b AS old_val, e.a AS new_val
)
AS twinned
WHERE
NOT EXISTS (
SELECT *
FROM #example AS lookup
WHERE twinned.new_val IN (lookup.a, lookup.b)
AND lookup.id < e.id
)
;
Where link_count = 2
, both a
and b
appear for the first time on this row, which means this row will start a new group.
Where link_count = 1
, new_val
has never been seen previously, but old_val
has been. So, once old_val
has been assigned a group, we can propagate that group to new_val
.
That's just a tree traversal creating a closure table.
WITH
closure AS
(
SELECT
new_val AS val,
DENSE_RANK() OVER (ORDER BY row_id) AS grp,
row_id AS row_id,
0 AS depth
FROM
#node
WHERE
link_count = 2
UNION ALL
SELECT
r.new_val,
c.grp,
r.row_id,
c.depth + 1
FROM
closure AS c
INNER JOIN
#node AS r
ON r.old_val = c.val
AND r.link_count = 1
)
Now, for any value in a
or b
we can look up the group for that value in the closure table. We'll likely get two different groups, one from looking up a
and one from looking up b
; so, we take the group that was allocated from the earliest row.
SELECT
e.*, g.grp
FROM
#example e
CROSS APPLY
(
SELECT TOP 1
c.grp
FROM
#closure AS c
WHERE
c.val IN (e.a, e.b)
ORDER BY
c.row_id
)
AS g
ORDER BY
e.id
Answered By - MatBailie Answer Checked By - Mary Flores (PHPFixing Volunteer)
0 Comments:
Post a Comment
Note: Only a member of this blog may post a comment.