Cleveland DBA

SQL Server administration in Cleveland, OH

Stuff I learned today – hierarchyid

I’m in training this week. Since I was restricted to a local class, I decided to take one focused on development, rather than dba-related topics, and get some exposure to facets of SQL Server I don’t normally deal with on a day-to-day basis. And I figured I’d share some of my newly garnered knowledge with you.

Today’s nugget of new knowledge: the hierarchyID data type. This is a new data type in SQL 2008 that addresses the challenges of storing parent/child hierarchical relationships inside a table. Previously, developers had to use CTEs and self-joins to handle this kind of scenario. A classic example is the Employee table, where Employees have Managers who are also Employees, seen below.

USE AdventureWorks;
GO

SELECT EmployeeID, LastName, FirstName, ManagerID
INTO EmployeeDemo
FROM HumanResources.Employee
JOIN Person.Contact ON Employee.ContactID = Contact.ContactID;
GO

SELECT
     Mgr.EmployeeID AS ManagerID, Mgr.FirstName + ' ' + Mgr.LastName AS ManagerName,
     Emp.EmployeeID AS EmployeeID, Emp.FirstName + ' ' + Emp.LastName AS EmployeeName
FROM EmployeeDemo AS Emp
LEFT JOIN EmployeeDemo AS Mgr
ON Emp.ManagerID = Mgr.EmployeeID
ORDER BY ManagerID, EmployeeID;

Another way to look at it is via a self-joining CTE. This will allow you to get the hierarchical level of the employee.

 WITH Organization(EmployeeId, LastName, Manager, HierarchyOrder)
 AS
 (
    SELECT emp.EmployeeId, emp.LastName, emp.LastName, 1 AS HierarchyOrder
    FROM EmployeeDemo AS emp
      WHERE emp.ManagerId is Null
    UNION ALL
    SELECT emp.EmployeeId, emp.LastName, Parent.LastName, HierarchyOrder + 1
    FROM EmployeeDemo AS emp
           INNER JOIN Organization AS Parent
                 ON emp.ManagerId = parent.EmployeeId
 )
 SELECT *
 From Organization
GO

That’s great and all, and pretty slick looking, but you could start to see some performance hit with a self-referencing view like that.
So let’s say I want to take my existing EmployeeDemo table and use it to create a hierarchical table, how would I do that? I’d start by creating a new table that contains a hierarchyid column.

CREATE TABLE OrgChart
(
  OrgNode hierarchyid,
  EmployeeID int,
  LastName varchar(50),
  FirstName varchar(50),
  ManagerID int
);
GO

I can then use a slightly modified version of my previous CTE to populate it.

WITH Organization(path, EmployeeID)
AS (
-- This section provides the value for the root of the hierarchy
SELECT hierarchyid::GetRoot() AS OrgNode, EmployeeID
FROM EmployeeDemo AS C
WHERE ManagerID IS NULL
UNION ALL
-- This section provides values for all nodes except the root
SELECT
CAST(p.path.ToString() + CAST(ROW_NUMBER() OVER (PARTITION BY ManagerID ORDER BY ManagerID) AS varchar(30)) + '/' AS hierarchyid),
C.EmployeeID
FROM EmployeeDemo AS C
JOIN Organization AS p
   ON C.ManagerID = P.EmployeeID
)
INSERT INTO OrgChart
SELECT path, o.EmployeeID, LastName, FirstName, ManagerID
FROM Organization o JOIN EmployeeDemo e ON o.EmployeeID = e.EmployeeID

SELECT * FROM OrgChart;
GO

So now what do I do with this new column? Well, having this new column makes the CTE unnecessary. You can traverse the hierarchy branches using built-in functions. Suppose I want to see who works under Peter Krebs, employee 21.

DECLARE @Manager hierarchyid

SELECT @Manager = OrgNode
FROM OrgChart
WHERE EmployeeID = 21 ;

SELECT *
FROM OrgChart
WHERE OrgNode.IsDescendantOf(@Manager) = 1
order by OrgNode;

With that query we can see that Cynthia Randall reports to Peter, and Andy Ruth reports to Cynthia, etc.

I’m definitely going to have to play around with this new datatype and all of the associated functions to get my brain wrapped around it better, but it looks pretty cool.

Some resources for further reading on the hierarchyid data type:
Populating a Table with Existing Hierarchical Data

Creating and Managing Data in a Hierarchical Table

Using hierarchyid Data Types (Database Engine)

June 6, 2011 Posted by | Features, T-SQL | , , | Leave a comment

Maintenance 3.0

I just couldn’t let it go.  Right after my last post I decided I just wasn’t satisfied with that maintenance script.  First of all, it only worked on one database at a time.  And secondly, there were just too many selects within the cursor loop.  And, personally, I’d rather work with a temp table than a physical table.

So I made a few adjustments, and here’s the new and improved (!)… Maintenance 3.0.

SET NOCOUNT ON;
DECLARE @dbid int;
DECLARE @dbname varchar(250);
DECLARE @objectid int;
DECLARE @indexid int;
DECLARE @partitioncount bigint;
DECLARE @schemaname sysname;
DECLARE @objectname sysname;
DECLARE @indexname sysname;
DECLARE @partitionnum bigint;
DECLARE @partitions bigint;
DECLARE @frag float;
DECLARE @fillfactor int;
DECLARE @command varchar(8000);

CREATE TABLE [dbo].[#work_to_do](
[dbid] [int] NULL,
[dbname] [varchar](250),
[schemaname] [varchar](250),
[objectid] [int] NULL,
[objectname] [varchar](250),
[indexid] [int] NULL,
[indexname] [varchar](250),
[partitionnum] [int] NULL,
[frag] [float] NULL,
[fillfactor] [int]
)

exec sp_msforeachdb 'USE [?];
INSERT INTO #work_to_do
SELECT
db_id() as dbid,
''?'',
sch.name,
s.object_id AS objectid,
o.name,
s.index_id AS indexid,
i.name,
s.partition_number AS partitionnum,
s.avg_fragmentation_in_percent AS frag,
i.fill_factor
FROM sys.dm_db_index_physical_stats (DB_ID(), NULL, NULL , NULL, ''LIMITED'') s
join sys.objects o (NOLOCK) on o.object_id = s.object_id
join sys.indexes i (NOLOCK) on i.object_id = s.object_id and i.index_id = s.index_id
JOIN sys.schemas sch (NOLOCK) ON sch.schema_id = o.schema_id
WHERE avg_fragmentation_in_percent > 5.0 AND s.index_id > 0
and fragment_count > 1000;'

PRINT '************FINISHED LOADING TABLE, REBUILDING INDEXES**************';

DECLARE partitions CURSOR FOR
SELECT * FROM #work_to_do where [dbname] not in ('master', 'tempdb', 'model', 'msdb', 'distribution');

-- Open the cursor.
OPEN partitions;

-- Loop through the partitions.
FETCH NEXT
FROM partitions
INTO @dbid, @dbname, @schemaname, @objectid, @objectname, @indexid, @indexname, @partitionnum, @frag,
@fillfactor;

WHILE @@FETCH_STATUS = 0
BEGIN;

SELECT @partitioncount = count (*)
FROM sys.partitions (NOLOCK)
WHERE object_id = @objectid AND index_id = @indexid;

IF @fillfactor = 0
SET @fillfactor = 80
ELSE
SET @fillfactor = @fillfactor - 5

-- 30 is an arbitrary decision point at which to switch between reorganizing and rebuilding
IF @frag < 30.0
BEGIN;

SELECT @command = 'USE ['+@dbname+']; ALTER INDEX ' + @indexname + ' ON ' + @schemaname + '.' + @objectname + ' REORGANIZE';
IF @partitioncount > 1
SELECT @command = @command + ' PARTITION=' + CONVERT (CHAR, @partitionnum);
EXEC (@command);
PRINT 'Executed ' + @command;

SELECT @command = 'USE ['+@dbname+']; UPDATE STATISTICS ' + @schemaname + '.' + @objectname + ' (' +@indexname + ') WITH SAMPLE 30 PERCENT'
EXEC (@command);
PRINT 'Executed ' + @command;

END;

IF @frag >= 30.0
BEGIN;

SELECT @command = 'USE ['+@dbname+']; ALTER INDEX ' + @indexname +' ON ' + @schemaname + '.' + @objectname + ' REBUILD WITH (PAD_INDEX = ON,  FILLFACTOR = '+ cast(@fillfactor as varchar) +') ';
IF @partitioncount > 1
SELECT @command = @command + ' PARTITION=' + CONVERT (CHAR, @partitionnum);
EXEC (@command);
PRINT 'Executed ' + @command;

END;

FETCH NEXT
FROM partitions
INTO @dbid, @dbname, @schemaname, @objectid, @objectname, @indexid, @indexname, @partitionnum, @frag,
@fillfactor;

END;
-- Close and deallocate the cursor.
CLOSE partitions;
DEALLOCATE partitions;

TRUNCATE TABLE #work_to_do

PRINT '************FINISHED REINDEXING, UPDATING STATS ON REMAINING TABLES**************';

exec sp_MSforeachdb 'USE [?];
INSERT INTO #work_to_do
select distinct db_id(), db_name(), sch.name, t.object_id, t.name, s.index_id, i.name, NULL, NULL, NULL
from
sys.tables t
join sys.dm_db_index_usage_stats s
on s.object_id = t.object_id
and s.database_id = DB_ID()
and user_updates > 0
and STATS_DATE(s.object_id, s.index_id) < s.last_user_update
join sys.indexes i
on t.object_id = i.object_id
and i.index_id = s.index_id
and s.index_id > 0
JOIN sys.schemas as sch ON sch.schema_id = t.schema_id;'
;

DECLARE stats CURSOR FOR
SELECT * FROM #work_to_do where [dbname] not in ('master', 'tempdb', 'model', 'msdb', 'distribution')

-- Open the cursor.
OPEN stats;

-- Loop through the partitions.
FETCH NEXT
FROM stats
INTO @dbid, @dbname, @schemaname, @objectid, @objectname, @indexid, @indexname, @partitionnum, @frag,
@fillfactor;

WHILE @@FETCH_STATUS = 0
BEGIN

SELECT @command = 'USE ['+@dbname+']; UPDATE STATISTICS ' + @schemaname + '.' + @objectname + ' (' +@indexname + ') WITH SAMPLE 30 PERCENT'
EXEC (@command);
PRINT 'Executed ' + @command;
FETCH NEXT
FROM stats
INTO @dbid, @dbname, @schemaname, @objectid, @objectname, @indexid, @indexname, @partitionnum, @frag,
@fillfactor;

END;
CLOSE stats;
DEALLOCATE stats;

drop table #work_to_do

June 6, 2011 Posted by | Maintenance | | Leave a comment