Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <#
- .SYNOPSIS
- Extracts metadata from RDL (Report Definition Language) files (2016/01 schema)
- and exports it into multiple CSV files, which are then consolidated into a
- single Excel workbook with separate sheets for each data category.
- .DESCRIPTION
- This script parses RDL XML files to extract various metadata, including:
- - Report-level properties (Name, Description, Author, Page Size, etc.)
- - Data Source definitions (Name, Connection String, Type, Security)
- - Dataset definitions (Name, Data Source, Command Text, Fields and their types)
- - Report Parameter definitions (Name, Data Type, Prompt, Multi-Value, Allow Blank,
- Hidden, Nullable, Default Values, and Valid Values source)
- - Report Section details (Body dimensions, Page Margins)
- The extracted data is first saved to individual CSV files for each category
- and then combined into a single, timestamped Excel (.xlsx) file, with each
- category residing on its own sheet.
- The script includes:
- - Robust XML parsing using XmlNamespaceManager for RDL 2016/01 schema.
- - Automatic installation of the 'ImportExcel' PowerShell module if not found.
- - Comprehensive metadata extraction for the specified elements.
- - Export to individual CSV files for structured output.
- - Conversion of CSVs to a single Excel workbook with named sheets.
- - Memory cleanup routines to dispose of large objects.
- .PARAMETER Path
- Specifies the root directory containing the RDL files to process.
- The script will recursively search for .rdl files within this path.
- .EXAMPLE
- .\Get-RDLMetadata2XLS.ps1
- Prompts for RDL folder path and output folder path, then extracts and
- exports metadata to Excel.
- .EXAMPLE
- # Directly specify paths (uncomment and modify in script)
- # $rdlFolderPath = "C:\MyReports"
- # $outputFolderPath = "C:\RDL_Output"
- # Get-RDLMetadata2XLS -Path $rdlFolderPath -OutputDirectory $outputFolderPath
- .NOTES
- Author: Gemini (AI)
- Date: June 3, 2025
- Version: 1.0
- This script requires internet access for automatic 'ImportExcel' module installation.
- Ensure you have appropriate permissions to install modules and write to specified directories.
- #>
- # Function to extract RDL metadata
- function Get-RDLMetadata {
- param(
- [Parameter(Mandatory=$true)]
- [string]$Path
- )
- $rdlFiles = Get-ChildItem -Path $Path -Filter "*.rdl" -Recurse
- if (-not $rdlFiles) {
- Write-Warning "No RDL files found in '$Path'."
- return @{
- ReportProperties = @()
- DataSources = @()
- Datasets = @()
- Parameters = @()
- ReportSections = @()
- }
- }
- # Initialize collections for each output type
- $allReportProperties = @()
- $allDataSources = @()
- $allDatasets = @()
- $allParameters = @()
- $allReportSections = @()
- foreach ($rdlFile in $rdlFiles) {
- Write-Host "Processing $($rdlFile.FullName)..."
- # Use a nested try-finally to ensure XML object is disposed
- $rdlContent = $null # Initialize to null
- try {
- [xml]$rdlContent = Get-Content $rdlFile.FullName -Encoding UTF8
- # --- Create an XmlNamespaceManager ---
- $nsManager = New-Object System.Xml.XmlNamespaceManager($rdlContent.NameTable)
- $nsManager.AddNamespace("r", "http://schemas.microsoft.com/sqlserver/reporting/2016/01/reportdefinition")
- $nsManager.AddNamespace("rd", "http://schemas.microsoft.com/SQLServer/reporting/reportdesigner")
- # ------------------------------------------------
- $fileName = $rdlFile.Name
- $fullPath = $rdlFile.FullName
- # --- Report Properties ---
- $reportNameNode = $rdlContent.SelectSingleNode("/r:Report/r:Name", $nsManager)
- $reportName = if ($reportNameNode) { $reportNameNode.InnerXml } else { $fileName.Replace(".rdl", "") } # Fallback to file name without extension
- $descriptionNode = $rdlContent.SelectSingleNode("/r:Report/r:Description", $nsManager)
- $description = if ($descriptionNode) { $descriptionNode.InnerXml } else { $null }
- $authorNode = $rdlContent.SelectSingleNode("/r:Report/r:Author", $nsManager)
- $author = if ($authorNode) { $authorNode.InnerXml } else { $null }
- $widthNode = $rdlContent.SelectSingleNode("/r:Report/r:Width", $nsManager)
- $width = if ($widthNode) { $widthNode.InnerXml } else { $null }
- $pageHeightNode = $rdlContent.SelectSingleNode("/r:Report/r:Page/r:PageHeight", $nsManager)
- $height = if ($pageHeightNode) { $pageHeightNode.InnerXml } else { $null }
- $languageNode = $rdlContent.SelectSingleNode("/r:Report/r:Language", $nsManager)
- $language = if ($languageNode) { $languageNode.InnerXml } else { $null }
- $consumeWhitespaceNode = $rdlContent.SelectSingleNode("/r:Report/r:ConsumeContainerWhitespace", $nsManager)
- $consumeWhitespace = if ($consumeWhitespaceNode) { [bool]$consumeWhitespaceNode.InnerXml } else { $null }
- $rdlIDNode = $rdlContent.SelectSingleNode("/r:Report/rd:ReportID", $nsManager) # Designer specific ID
- $rdlID = if ($rdlIDNode) { $rdlIDNode.InnerXml } else { $null }
- $reportUnitTypeNode = $rdlContent.SelectSingleNode("/r:Report/rd:ReportUnitType", $nsManager) # Designer specific Unit Type
- $reportUnitType = if ($reportUnitTypeNode) { $reportUnitTypeNode.InnerXml } else { $null }
- $allReportProperties += [PSCustomObject]@{
- FileName = $fileName
- FullPath = $fullPath
- ReportName = $reportName
- Description = $description
- Author = $author
- Width = $width
- PageHeight = $height
- Language = $language
- ConsumeContainerWhitespace = $consumeWhitespace
- ReportID = $rdlID
- ReportUnitType = $reportUnitType
- }
- # --- Data Sources ---
- $rdlContent.SelectNodes("//r:DataSource", $nsManager) | ForEach-Object {
- $dsName = $_.Name
- $dataSourceReferenceNode = $_.SelectSingleNode("r:DataSourceReference", $nsManager)
- $connectionString = if ($dataSourceReferenceNode) { $dataSourceReferenceNode.InnerXml } else { $null }
- if (-not $connectionString) { # If DataSourceReference is not present, check ConnectionProperties
- $connectionStringNode = $_.ConnectionProperties.SelectSingleNode("r:ConnectString", $nsManager)
- $connectionString = if ($connectionStringNode) { $connectionStringNode.InnerXml } else { $null }
- }
- $dsTypeNode = $_.ConnectionProperties.SelectSingleNode("r:DataProvider", $nsManager)
- $dsType = if ($dsTypeNode) { $dsTypeNode.InnerXml } else { $null }
- $securityTypeNode = $_.SelectSingleNode("rd:SecurityType", $nsManager)
- $securityType = if ($securityTypeNode) { $securityTypeNode.InnerXml } else { $null }
- $promptNode = $_.SelectSingleNode("r:Prompt", $nsManager)
- $prompt = if ($promptNode) { $promptNode.InnerXml } else { $null }
- $allDataSources += [PSCustomObject]@{
- FileName = $fileName
- ReportName = $reportName
- DataSourceName = $dsName
- ConnectionString = $connectionString
- Type = $dsType
- SecurityType = $securityType
- Prompt = $prompt
- }
- }
- # --- Datasets and their Fields ---
- $rdlContent.SelectNodes("//r:DataSet", $nsManager) | ForEach-Object {
- $datasetName = $_.Name
- $dataSourceNameNode = $_.Query.SelectSingleNode("r:DataSourceName", $nsManager)
- $dataSourceName = if ($dataSourceNameNode) { $dataSourceNameNode.InnerXml } else { $null }
- $commandTextNode = $_.Query.SelectSingleNode("r:CommandText", $nsManager)
- $commandText = if ($commandTextNode) { $commandTextNode.InnerXml } else { $null }
- $commandTypeNode = $_.Query.SelectSingleNode("r:CommandType", $nsManager)
- $commandType = if ($commandTypeNode) { $commandTypeNode.InnerXml } else { "Text" } # Default is Text
- $timeoutNode = $_.Query.SelectSingleNode("r:Timeout", $nsManager)
- $timeout = if ($timeoutNode) { [int]$timeoutNode.InnerXml } else { $null }
- $caseSensitivityNode = $_.SelectSingleNode("r:CaseSensitivity", $nsManager)
- $caseSensitivity = if ($caseSensitivityNode) { [bool]$caseSensitivityNode.InnerXml } else { $null }
- # Fields within this dataset
- $fieldsFound = $false
- $_.SelectNodes("r:Fields/r:Field", $nsManager) | ForEach-Object {
- $fieldsFound = $true
- $fieldName = $_.Name
- $dataFieldNode = $_.SelectSingleNode("r:DataField", $nsManager)
- $dataField = if ($dataFieldNode) { $dataFieldNode.InnerXml } else { $null }
- $typeNameNode = $_.SelectSingleNode("rd:TypeName", $nsManager) # rd:TypeName is often for designer info
- $typeName = if ($typeNameNode) { $typeNameNode.InnerXml } else { $null }
- $dataTypeNode = $_.SelectSingleNode("r:rd:DataType", $nsManager) # Some fields might have this
- $dataType = if ($dataTypeNode) { $dataTypeNode.InnerXml } else { $null }
- $allDatasets += [PSCustomObject]@{
- FileName = $fileName
- ReportName = $reportName
- DataSetName = $datasetName
- DataSourceName = $dataSourceName
- CommandText = $commandText
- CommandType = $commandType
- Timeout = $timeout
- CaseSensitivity = $caseSensitivity
- FieldName = $fieldName
- DataField = $dataField
- FieldTypeName = $typeName
- FieldDataType = $dataType
- }
- }
- # If a dataset has no fields but we still want to log it (e.g., empty datasets)
- if (-not $fieldsFound) {
- $allDatasets += [PSCustomObject]@{
- FileName = $fileName
- ReportName = $reportName
- DataSetName = $datasetName
- DataSourceName = $dataSourceName
- CommandText = $commandText
- CommandType = $commandType
- Timeout = $timeout
- CaseSensitivity = $caseSensitivity
- FieldName = $null
- DataField = $null
- FieldTypeName = $null
- FieldDataType = $null
- }
- }
- }
- # --- Report Parameters ---
- $rdlContent.SelectNodes("//r:ReportParameter", $nsManager) | ForEach-Object {
- $paramName = $_.Name
- $paramDataTypeNode = $_.SelectSingleNode("r:DataType", $nsManager)
- $paramDataType = if ($paramDataTypeNode) { $paramDataTypeNode.InnerXml } else { $null }
- $paramPromptNode = $_.SelectSingleNode("r:Prompt", $nsManager)
- $paramPrompt = if ($paramPromptNode) { $paramPromptNode.InnerXml } else { $null }
- $paramMultiValueNode = $_.SelectSingleNode("r:MultiValue", $nsManager)
- $paramMultiValue = if ($paramMultiValueNode) { [bool]$paramMultiValueNode.InnerXml } else { $false }
- $paramAllowBlankNode = $_.SelectSingleNode("r:AllowBlank", $nsManager)
- $paramAllowBlank = if ($paramAllowBlankNode) { [bool]$paramAllowBlankNode.InnerXml } else { $false }
- $paramHiddenNode = $_.SelectSingleNode("r:Hidden", $nsManager)
- $paramHidden = if ($paramHiddenNode) { [bool]$paramHiddenNode.InnerXml } else { $false }
- $paramNullableNode = $_.SelectSingleNode("r:Nullable", $nsManager)
- $paramNullable = if ($paramNullableNode) { [bool]$paramNullableNode.InnerXml } else { $false }
- # Default values (can be multiple)
- $defaultValues = @()
- $_.SelectNodes("r:DefaultValues/r:Value", $nsManager) | ForEach-Object {
- $defaultValues += $_.InnerXml
- }
- $defaultValuesText = if ($defaultValues.Count -gt 0) { $defaultValues -join ';' } else { $null }
- # Valid values from DatasetReference or Static Values
- $paramValuesSource = $null
- $validValuesDataSetRef = $_.ValidValues.SelectSingleNode("r:DataSetReference", $nsManager)
- if ($validValuesDataSetRef) {
- $refDataSetNameNode = $validValuesDataSetRef.SelectSingleNode("r:DataSetName", $nsManager)
- $refValueFieldNode = $validValuesDataSetRef.SelectSingleNode("r:ValueField", $nsManager)
- $refLabelFieldNode = $validValuesDataSetRef.SelectSingleNode("r:LabelField", $nsManager)
- $paramValuesSource = "DataSet: $($refDataSetNameNode.InnerXml), ValueField: $($refValueFieldNode.InnerXml), LabelField: $($refLabelFieldNode.InnerXml)"
- }
- $staticValidValues = @()
- $_.SelectNodes("r:ValidValues/r:ParameterValues/r:ParameterValue/r:Value", $nsManager) | ForEach-Object {
- $staticValidValues += $_.InnerXml
- }
- if ($staticValidValues.Count -gt 0) {
- if ($paramValuesSource) {
- $paramValuesSource += "; Static Values: " + ($staticValidValues -join ';')
- } else {
- $paramValuesSource = "Static Values: " + ($staticValidValues -join ';')
- }
- }
- $allParameters += [PSCustomObject]@{
- FileName = $fileName
- ReportName = $reportName
- ParameterName = $paramName
- DataType = $paramDataType
- Prompt = $paramPrompt
- MultiValue = $paramMultiValue
- AllowBlank = $paramAllowBlank
- Hidden = $paramHidden
- Nullable = $paramNullable
- DefaultValues = $defaultValuesText
- ValuesSource = $paramValuesSource
- }
- }
- # --- Report Sections (simplified, focusing on containers) ---
- $rdlContent.SelectNodes("/r:Report/r:ReportSections/r:ReportSection", $nsManager) | ForEach-Object {
- $sectionIndex = ($allReportSections | Where-Object { $_.FileName -eq $fileName -and $_.ReportName -eq $reportName }).Count + 1
- $sectionName = $_.Name
- if (-not $sectionName) { $sectionName = "Section_$sectionIndex" }
- $bodyHeightNode = $_.Body.SelectSingleNode("r:Height", $nsManager)
- $bodyHeight = if ($bodyHeightNode) { $bodyHeightNode.InnerXml } else { $null }
- $bodyWidthNode = $_.Body.SelectSingleNode("r:Width", $nsManager)
- $bodyWidth = if ($bodyWidthNode) { $bodyWidthNode.InnerXml } else { $null }
- $pageMargins = $_.Page
- $leftMargin = if ($pageMargins) { $pageMargins.SelectSingleNode("r:LeftMargin", $nsManager).InnerXml } else { $null }
- $rightMargin = if ($pageMargins) { $pageMargins.SelectSingleNode("r:RightMargin", $nsManager).InnerXml } else { $null }
- $topMargin = if ($pageMargins) { $pageMargins.SelectSingleNode("r:TopMargin", $nsManager).InnerXml } else { $null }
- $bottomMargin = if ($pageMargins) { $pageMargins.SelectSingleNode("r:BottomMargin", $nsManager).InnerXml } else { $null }
- $allReportSections += [PSCustomObject]@{
- FileName = $fileName
- ReportName = $reportName
- SectionName = $sectionName
- BodyHeight = $bodyHeight
- BodyWidth = $bodyWidth
- PageLeftMargin = $leftMargin
- PageRightMargin = $rightMargin
- PageTopMargin = $topMargin
- PageBottomMargin = $bottomMargin
- }
- }
- }
- catch {
- Write-Error "Error processing $($rdlFile.FullName): $($_.Exception.Message)"
- }
- finally {
- # Explicitly dispose of the XML document to free up memory
- if ($rdlContent -is [System.IDisposable]) {
- $rdlContent.Dispose()
- $rdlContent = $null # Clear the variable
- }
- # Other objects are typically managed by PowerShell's garbage collection,
- # but setting variables to null can help for large collections.
- $nsManager = $null
- }
- }
- # Return a hashtable of all collected data
- return @{
- ReportProperties = $allReportProperties
- DataSources = $allDataSources
- Datasets = $allDatasets
- Parameters = $allParameters
- ReportSections = $allReportSections
- }
- }
- # --- Main Script Execution ---
- #region Prerequisites and Setup
- #region Install ImportExcel Module
- # Check if ImportExcel module is installed
- if (-not (Get-Module -ListAvailable -Name ImportExcel)) {
- Write-Host "The 'ImportExcel' module is not found. Attempting to install..."
- try {
- Install-Module -Name ImportExcel -Scope CurrentUser -Force -AllowClobber
- Write-Host "The 'ImportExcel' module was installed successfully."
- }
- catch {
- Write-Error "Failed to install the 'ImportExcel' module. Please install it manually by running:"
- Write-Error " Install-Module -Name ImportExcel -Scope CurrentUser -Force -AllowClobber"
- Write-Error "Error details: $($_.Exception.Message)"
- exit 1 # Exit script if module cannot be installed
- }
- }
- # Import the module (even if already installed, this ensures it's loaded for the script's scope)
- Import-Module ImportExcel -Force -Global:$true
- #endregion
- # Prompt for RDL folder path
- $rdlFolderPath = Read-Host "Enter the path to your RDL files (e.g., C:\MyReports)"
- while (-not (Test-Path $rdlFolderPath -PathType Container)) {
- Write-Warning "The specified RDL folder path does not exist or is not a directory. Please try again."
- $rdlFolderPath = Read-Host "Enter the path to your RDL files (e.g., C:\MyReports)"
- }
- # Prompt for output folder path
- $outputFolderPath = Read-Host "Enter the desired output folder path for CSVs and Excel (e.g., C:\RDL_Output)"
- if (-not (Test-Path $outputFolderPath -PathType Container)) {
- try {
- New-Item -ItemType Directory -Path $outputFolderPath -Force | Out-Null
- Write-Host "Created output directory: $outputFolderPath"
- }
- catch {
- Write-Error "Failed to create output directory: $($_.Exception.Message). Please ensure you have write permissions."
- exit 1
- }
- }
- Write-Host "Starting RDL metadata extraction from: $rdlFolderPath"
- #endregion
- #region Extraction and CSV Export
- $allExtractedData = Get-RDLMetadata -Path $rdlFolderPath
- $csvFilePaths = @() # To store paths of generated CSVs for later Excel conversion
- # Export Report Properties
- if ($allExtractedData.ReportProperties.Count -gt 0) {
- $reportPropertiesCsvPath = Join-Path $outputFolderPath "RDL_ReportProperties.csv"
- $allExtractedData.ReportProperties | Export-Csv -Path $reportPropertiesCsvPath -NoTypeInformation -Encoding UTF8
- $csvFilePaths += $reportPropertiesCsvPath
- Write-Host "Exported Report Properties to: $reportPropertiesCsvPath"
- } else {
- Write-Warning "No Report Properties extracted."
- }
- # Export Data Sources
- if ($allExtractedData.DataSources.Count -gt 0) {
- $dataSourcesCsvPath = Join-Path $outputFolderPath "RDL_DataSources.csv"
- $allExtractedData.DataSources | Export-Csv -Path $dataSourcesCsvPath -NoTypeInformation -Encoding UTF8
- $csvFilePaths += $dataSourcesCsvPath
- Write-Host "Exported Data Sources to: $dataSourcesCsvPath"
- } else {
- Write-Warning "No Data Sources extracted."
- }
- # Export Datasets and Fields
- if ($allExtractedData.Datasets.Count -gt 0) {
- $datasetsCsvPath = Join-Path $outputFolderPath "RDL_Datasets.csv"
- $allExtractedData.Datasets | Export-Csv -Path $datasetsCsvPath -NoTypeInformation -Encoding UTF8
- $csvFilePaths += $datasetsCsvPath
- Write-Host "Exported Datasets and Fields to: $datasetsCsvPath"
- } else {
- Write-Warning "No Datasets or Fields extracted."
- }
- # Export Parameters
- if ($allExtractedData.Parameters.Count -gt 0) {
- $parametersCsvPath = Join-Path $outputFolderPath "RDL_Parameters.csv"
- $allExtractedData.Parameters | Export-Csv -Path $parametersCsvPath -NoTypeInformation -Encoding UTF8
- $csvFilePaths += $parametersCsvPath
- Write-Host "Exported Parameters to: $parametersCsvPath"
- } else {
- Write-Warning "No Parameters extracted."
- }
- # Export Report Sections
- if ($allExtractedData.ReportSections.Count -gt 0) {
- $reportSectionsCsvPath = Join-Path $outputFolderPath "RDL_ReportSections.csv"
- $allExtractedData.ReportSections | Export-Csv -Path $reportSectionsCsvPath -NoTypeInformation -Encoding UTF8
- $csvFilePaths += $reportSectionsCsvPath
- Write-Host "Exported Report Sections to: $reportSectionsCsvPath"
- } else {
- Write-Warning "No Report Sections extracted."
- }
- #endregion
- #region CSV to Excel Conversion
- if ($csvFilePaths.Count -gt 0) {
- Write-Host "`nConverting CSV files to Excel..."
- $excelFileName = "RDL_Metadata_$(Get-Date -Format 'yyyyMMdd_HHmmss').xlsx"
- $excelFilePath = Join-Path $outputFolderPath $excelFileName
- $excelPackage = $null # Initialize for disposal
- try {
- # Create a new Excel file
- $excelPackage = New-ExcelPackage -Path $excelFilePath
- foreach ($csvPath in $csvFilePaths) {
- $sheetName = [System.IO.Path]::GetFileNameWithoutExtension($csvPath).Replace("RDL_", "") # Get sheet name from CSV filename
- Write-Host " Adding '$sheetName' data to Excel sheet..."
- $data = Import-Csv -Path $csvPath -Encoding UTF8
- $data | Export-Excel -ExcelPackage $excelPackage -WorksheetName $sheetName -AutoFilter -AutoSize -ClearSheet -PassThru | Out-Null
- $data = $null # Clear data after export
- }
- Close-ExcelPackage -ExcelPackage $excelPackage -Show:$false # Save and close the Excel file without opening it
- Write-Host "Successfully converted CSVs to Excel: $excelFilePath"
- # Optional: Remove CSV files after conversion
- # Write-Host "Removing temporary CSV files..."
- # $csvFilePaths | Remove-Item -ErrorAction SilentlyContinue -Force
- # Write-Host "Temporary CSV files removed."
- }
- catch {
- Write-Error "An error occurred during CSV to Excel conversion: $($_.Exception.Message)"
- }
- finally {
- # Dispose of the Excel package object
- if ($excelPackage -is [System.IDisposable]) {
- $excelPackage.Dispose()
- $excelPackage = $null
- }
- }
- } else {
- Write-Warning "No CSV files were generated, so Excel conversion is skipped."
- }
- #endregion
- #region Memory Cleanup
- Write-Host "`nInitiating memory cleanup..."
- # Clear variables holding large data collections and file lists
- $allExtractedData = $null
- $allReportProperties = $null
- $allDataSources = $null
- $allDatasets = $null
- $allParameters = $null
- $allReportSections = $null
- $rdlFiles = $null
- $csvFilePaths = $null
- $rdlFolderPath = $null
- $outputFolderPath = $null
- # Force garbage collection (use with caution, as it can be resource-intensive, but useful for large scripts)
- # [System.GC]::Collect()
- # [System.GC]::WaitForPendingFinalizers()
- Write-Host "Memory cleanup routines completed."
- #endregion
- Write-Host "`nRDL metadata extraction and export process finished."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement