Pages

Saturday, July 21, 2018

Export Large SQL Tables into Multiple Text Files

A requirement has been arisen to export large SQL tables into a cloud storage. Nowadays, there are many ETL tools and cloud services in the market that support this process. However, in this particular scenario, the requirement was to develop a custom proc to export a given table into multiple CSV files based on a batch size (i.e. specific number of rows per file)

Following proc generates a dynamic BCP command and then execute it using xp_cmdshell system proc.
CREATE PROCEDURE [dbo].[sp_Export_Large_SQL_Tables]
 @Server  VARCHAR(200)
 , @DBName VARCHAR(200)  
 , @TableName VARCHAR(200)  
 , @Delimiter VARCHAR(200) = '|'  -- Output File Delimiter
 , @OutputExt VARCHAR(200) = 'csv' -- Output File Extension
 , @OutputDir VARCHAR(200) 
 , @LogDir VARCHAR(200) = null
 , @OrderByCol VARCHAR(200) = null
 , @BatchSize INT = 500000
AS
/**************************************************************************  
-- Name   : [dbo].[sp_Export_Large_SQL_Tables]
-- Desc   : 
-- Notes  : 
-- Dependencies         : bcp.exe
**************************************************************************  
-- Ve   Date        Author     Description  
-- --   --------    -------    --------------------------- 
-- 1    2018-05-31 Isura Silva  Created
*************************************************************************/
BEGIN
 SET NOCOUNT ON
 DECLARE @vcStep VARCHAR(max) = '' -- Store Custom error message
 DECLARE @vcSQL VARCHAR(max)  = '' -- Store execution command
 DECLARE @Proc_Name varchar(255) = OBJECT_NAME(@@PROCID) -- Automatically picks the proc name
 ------- 
 SET @LogDir = ISNULL(@LogDir,@OutputDir) ;
 SET @Server = ISNULL(@Server, 'localhost');
 SET @Delimiter = ISNULL(@Delimiter, '|');
 SET @OutputExt = ISNULL(@OutputExt, '.csv'); -- Output File Extension
 IF @OrderByCol IS NULL OR @OrderByCol = ''
  BEGIN
  SET @vcSQL = 'SELECT TOP 1 @OrderByCol = COLUMN_NAME FROM ' 
   + @DBName + '.[INFORMATION_SCHEMA].[COLUMNS] WHERE CONCAT([TABLE_SCHEMA],''' + '.' +''',[TABLE_NAME]) = ''' 
   + @TableName + ''' OR [TABLE_NAME] = ''' + @TableName + ''''
  -- PRINT @vcSQL;
  DECLARE @vcStep2 NVARCHAR(max) = CAST(@vcSQL AS NVARCHAR(MAX))
  EXEC sp_executesql @vcStep2, @params = N'@OrderByCol VARCHAR(200) OUTPUT', @OrderByCol = @OrderByCol OUTPUT
  END
 ------- 
 DECLARE @BCPexe VARCHAR(200) = CASE 
  -- WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) like '10.0%' THEN 'unknown' -- 'SQL2008'
  -- WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) like '10.5%' THEN 'unknown' -- 'SQL2008 R2'
  -- WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) like '11%' THEN 'unknown' -- 'SQL2012'
  -- WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) like '12%' THEN 'unknown' -- 'SQL2014'
  WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) like '13%' THEN 'C:\Program Files\Microsoft SQL Server\Client SDK\ODBC\130\Tools\Binn\bcp.exe' --'SQL2016'     
  WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) like '14%' THEN 'C:\Program Files\Microsoft SQL Server\Client SDK\ODBC\130\Tools\Binn\bcp.exe' --'SQL2017' 
  ELSE 'C:\Program Files\Microsoft SQL Server\Client SDK\ODBC\130\Tools\Binn\bcp.exe'
  END

 DECLARE @SQLQuery VARCHAR(8000) = '';
 DECLARE @SQLbcp  VARCHAR(8000) = '';
 DECLARE @OutputPath VARCHAR(200);
 DECLARE @LogPath VARCHAR(200);
 -------
 DECLARE @SQLRowCount NVARCHAR(4000)= N'';
 DECLARE @CurrentRowCount INT = 0;
 DECLARE @TableRowCount INT;
 BEGIN TRY 
 Print '---------------------------------------------------------------------------------------------------------------'
  -- Get the row count of SQL table
  SET @SQLRowCount = CAST(('SELECT @TableRowCount = COUNT(*) FROM ' + @DBName + '.' + @TableName) AS nvarchar(4000)) 
  EXEC sp_executesql @SQLRowCount, @params = N'@TableRowCount INT OUTPUT', @TableRowCount = @TableRowCount OUTPUT
  --PRINT @SQLRowCount
 
  WHILE (@CurrentRowCount < @TableRowCount) 
  BEGIN 
   -- Pupulate the input query for BCP command based on the batch size (default batch is 500K rows)
   SET @SQLQuery = 'SELECT * FROM ' + @DBName + '.' + @TableName + ' ORDER BY ' + quotename(@OrderByCol) + ' ASC '
   + ' OFFSET ' + CAST(@CurrentRowCount as VARCHAR(100)) + ' ROWS FETCH NEXT ' + CAST(@BatchSize AS VARCHAR(100)) 
   + ' ROWS ONLY'

  ---------------------------------------- 
   SET @OutputPath =  @OutputDir + '\' + @TableName + '_' + CAST(@CurrentRowCount as VARCHAR(100)) + '-' + CAST((@CurrentRowCount+ @BatchSize) as VARCHAR(100)) + '.csv';
   SET @LogPath = @LogDir + '\' + @TableName + '.log';
   -- Pupulate BCP command
   SET @SQLbcp = 'CALL ' +  quotename(@BCPexe, '"') + ' ' 
    + '"' + @SQLQuery + '"'
    + ' queryout ' + quotename(@OutputPath, '"')
    + ' -S ' + quotename(@Server, '"') 
    + ' -d ' + quotename(@DBName, '"') 
    + ' -t ' + quotename(@Delimiter, '"') 
    + ' -e ' + quotename(@LogPath, '"') 
    + ' -T -c -C 65001 -q '

   PRINT @SQLbcp
   DECLARE @result int;  
   
   --------
   EXEC @result = xp_cmdshell @SQLbcp , NO_OUTPUT;  
   IF (@result = 0 and (@SQLbcp <> '' OR @SQLbcp IS NOT NULL))  
      PRINT 'Success'  
   ELSE  
    PRINT 'Failure'
    PRINT @SQLbcp;

  SET @CurrentRowCount = @CurrentRowCount + @BatchSize 
  END
  ----------------------------------------
  Print '---------------------------------------------------------------------------------------------------------------'
 END TRY  
 BEGIN CATCH  
  PRINT ERROR_MESSAGE() 
        RAISERROR(@Proc_Name,16,1)
 END CATCH
END

e.g. Call procedure - TSQL
EXEC [dbo].[sp_Export_Large_SQL_Tables] @Server = 'Localhost'
 , @DBName = 'Insight'
 , @TableName = 'dbo.YOU_Detailed'
 , @Delimiter = '|' 
 , @OutputExt = 'csv'
 , @OutputDir = 'E:\Insight\Export'
 , @LogDir = null
 , @OrderByCol = null
 , @BatchSize = 500000

e.g. Call  procedure - PowerShell

$ServerInstance = "Localhost"
$Database       = "Insight"
$TableName      = "dbo.YOU_Detailed"
$OutputDir      = "E:\Insight\Export"
   
## Export SQL tables to CSV
$SQLcmd  = "EXEC [dbo].[sp_Export_Large_SQL_Tables] @Server = " + "'" + $ServerInstance + "'" `
    + ", @DBName = " + "'" + $Database + "'" `
    + ", @TableName = " + "'" + $TableName + "'" `
    + ", @Delimiter = " + "'|'" `
    + ", @OutputExt = " + "'csv'" `
    + ", @OutputDir = " + "'" + $OutputDir + "'" `
    + ", @LogDir = " + "null" `
    + ", @OrderByCol    = " + "null" `
    + ", @BatchSize = " + "500000" `
 
Invoke-Sqlcmd -ServerInstance $ServerInstance -Query $SQLcmd  -Database "Utils" -QueryTimeout 0 -verbose