Monday, March 7, 2011

Import XML Files into SQL Server 2005 or 2008

Presented is a script that allows one to import all XML files in a specific directory into SQL Server 2005 or 2008 and store it as XML. Most of what is presented should work for importing the contents of any type of file, not just XML. The example uses OLE Automation Procedures to extract extra data that one may find of use storing along with the XML such as file creation time, size, etc.

The first step is to enable OLE Automation Procuedures in SQL Server:

USE [master];
GO

EXECUTE SP_CONFIGURE 'show advanced options', 1;
RECONFIGURE WITH OVERRIDE;
GO

EXECUTE SP_CONFIGURE 'Ole Automation Procedures', 1;
RECONFIGURE WITH OVERRIDE;
GO

EXECUTE SP_CONFIGURE 'show advanced options', 0;
RECONFIGURE WITH OVERRIDE;
GO

The next step is to create a table to hold the XML documents. This table will also allow one to version the XML documents if a document is imported with the same name as one that already exists:

IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID('[meta].[XMLSchema]') AND type in ('U'))
 DROP TABLE [meta].[XMLSchema];
GO

CREATE TABLE [meta].[XMLSchema](
  [XMLSchemaID] [int] IDENTITY (1, 1) NOT NULL
 ,[FileName] [VARCHAR](500) NOT NULL
 ,[FileVersion] AS CAST(Major AS varchar) + '.' + CAST(Minor AS varchar) + '.' + CAST(Maint AS varchar)
 ,[Major] int NOT NULL
 ,[Minor] int NOT NULL
 ,[Maint] int NOT NULL
 ,[FileType] [VARCHAR] (100) NOT NULL
 ,[FileSize] int NOT NULL
 ,[TimeCreated] [DATETIME] NOT NULL
 ,[TimeLastModified][DATETIME] NOT NULL 
 ,[XMLData] [xml] NOT NULL
 ,[EffDate] [datetime] NOT NULL
 ,[InEffDate] [datetime] NULL
 ,[CurrVersionFlag] [tinyint] NOT NULL
) ON [PRIMARY]
GO

-- Primary key
ALTER TABLE [meta].[XMLSchema]
 ADD CONSTRAINT [PK_XMLSchema] PRIMARY KEY CLUSTERED(
   [FileName] ASC
  ,[Major] ASC
  ,[Minor] ASC
  ,[Maint] ASC
 ) ON [PRIMARY];
GO

-- Defaults
ALTER TABLE [meta].[XMLSchema]
 ADD CONSTRAINT DF_XMLSchema_CurrVersionFlag
  DEFAULT 0 FOR CurrVersionFlag;
GO

Now create the stored procedure that will import the XML files. The header and comments should make it clear what is happening within the stored procedure:

IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID('[meta].[ImportSchemaFiles]') AND type in ('P'))
 DROP PROCEDURE [meta].[ImportSchemaFiles];
GO

-------------------------------------------------------------------------------
-- Date:   November 11, 2010
-- Author: Keith R. Davis
-- Desc:   Imports XSD schema files from the specified directory and
--         attaches them to the supplied version information. If an existing
--         schema file is found on import, it will be deprecated and the new
--         file will be set at the current version of that file to use.       
--
-- Param:  @SchemaDir - file system dir containing the files to import
--         @MajorVer  - major version of the files being imported
--         @MinorVer  - minor version of the files being imported
--         @MaintVer  - maintenance version of the files being imported
--
-- Return: Nothing
-------------------------------------------------------------------------------
 
CREATE PROCEDURE [meta].[ImportSchemaFiles](
 @SchemaDir varchar(1024)
 ,@MajorVer int
 ,@MinorVer int 
 ,@MaintVer int
)
AS

BEGIN
 -- Turn off row counts
 SET NOCOUNT ON;
 SET XACT_ABORT ON;

 -- Variables
 DECLARE @FileExist   int    -- File exists flag
 DECLARE @FileName   varchar(500) -- File name
 DECLARE @FullFileName  varchar(500) -- Full file name (path + filename)
 DECLARE @SQL    nvarchar(1000) -- Dynamic SQL variable
 DECLARE @xml    xml    -- Holds imported schema data
 DECLARE @ObjFile   int    -- File object
 DECLARE @ObjFileSystem  int    -- File system object
 DECLARE @DateCreated  datetime  -- Date file was created
 DECLARE @DateLastModified datetime  -- Date file was last written to
 DECLARE @Size    int    -- File size in bytes
 DECLARE @Type    varchar(100) -- Long Windows file type (eg.'Text Document',etc)
 DECLARE @LoopID    int    -- Loop counter
 DECLARE @MaxID    int    -- Max ID from list of files to import

 BEGIN TRY
  BEGIN TRANSACTION
   -- Check if the temp file list table exists and if so, drop it
   IF OBJECT_ID('tempdb..#tempList') IS NOT NULL
    DROP TABLE #tempList;
   
   -- Create a temp table to hold a list of files to import
   CREATE TABLE #tempList(
     [FileName] varchar(500)
    ,Depth tinyint
    ,[File] tinyint
   );
   
   -- Add the files to the import list
   INSERT INTO #tempList
    EXEC xp_dirtree @SchemaDir, 1, 1;
   
   -- This will be used to loop over the table
   ALTER TABLE #tempList ADD id int IDENTITY;

   -- Set the loop counters
   SELECT @LoopID = MIN(id),@MaxID = MAX(ID)
   FROM #tempList

   -- Create a file system object and remember the "handle"
   EXEC dbo.sp_OACreate 'Scripting.FileSystemObject', @ObjFileSystem OUT


   -- Start importing the files   
   WHILE @LoopID <= @MaxID
   BEGIN
    -- Get the current file name
    SELECT @FileName = filename
    FROM #tempList
    WHERE id = @LoopID
    
    -- Set the full file name
    SELECT @FullFileName = @SchemaDir + '\' + @FileName
       
    -- Sanity check in case someone removed the file
    EXEC xp_fileexist @FullFileName , @FileExist OUTPUT
    IF @FileExist =1 
    BEGIN
     -- Import the current file
     SELECT @SQL = N'SELECT @xml = xml 
      FROM OPENROWSET(BULK ''' + @FullFileName + ''', Single_BLOB) AS TEMP(xml)'
     EXEC SP_EXECUTESQL @SQL, N'@xml xml OUTPUT', @xml OUTPUT
         
     -- If a file with the same name as the current file already exists, deprecate it
     -- by setting the ineffective date on that file.
     IF EXISTS (SELECT * FROM meta.XMLSchema WHERE [filename] = @FileName)
      UPDATE meta.XMLSchema SET InEffDate = GETDATE(), CurrVersionFlag = 0
      WHERE
       [filename] = @FileName
       AND EffDate = (SELECT MAX(EffDate) FROM meta.XMLSchema WHERE [filename] = @FileName)  

     -- Create an object for the path/file and remember the "handle"
     EXEC dbo.sp_OAMethod @ObjFileSystem, 'GetFile', @ObjFile OUT, @FullFileName

     -- Get the all the required attributes for the file itself
     EXEC dbo.sp_OAGetProperty @ObjFile, 'DateCreated',      @DateCreated      OUT
     EXEC dbo.sp_OAGetProperty @ObjFile, 'DateLastModified', @DateLastModified OUT
     EXEC dbo.sp_OAGetProperty @ObjFile, 'Size',             @Size             OUT
     EXEC dbo.sp_OAGetProperty @ObjFile, 'Type',             @Type             OUT

     -- Insert the current schema file, we use the output value (@xml) from above
     INSERT meta.XMLSchema (
       [FileName]
      ,[Major]
      ,[Minor]
      ,[Maint]
      ,[FileType]
      ,[FileSize]
      ,[timecreated]
      ,[timelastmodified]
      ,[xmldata]
      ,[effdate]
      ,[ineffdate]
      ,[CurrVersionFlag])
     SELECT
       @FileName
      ,@MajorVer
      ,@MinorVer
      ,@MaintVer
      ,@Type
      ,@Size
      ,@DateLastModified
      ,@DateCreated
      ,@xml
      ,GETDATE()
      ,NULL
      ,1
    END
    
    -- Get the next id, instead of +1 we grab the next value in case of skipped id values
    SELECT @LoopID = MIN(id)
    FROM #tempList
    WHERE id > @LoopID
   END
  COMMIT TRANSACTION
 END TRY

 BEGIN CATCH
  -- Test if the transaction is uncommittable.
  IF (XACT_STATE()) = -1
   ROLLBACK TRANSACTION
  -- Test if the transaction is active and valid.
  IF (XACT_STATE()) = 1
   COMMIT TRANSACTION
 END CATCH
END;
GO

To import the documents, use the stored procedure as follows, noting that the directory with the files resides on the same machine as SQL Server:

-- Import the schema files
EXEC db.ImportSchemaFiles @SchemaDir = 'C:\XSDs', @MajorVer = 2, @MinorVer = 9, @MaintVer = 0;
-- Done

That's all for today!