-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathCheckpoint-PowerBIScannerApiData.ps1
193 lines (154 loc) · 7.52 KB
/
Checkpoint-PowerBIScannerApiData.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
<#
.SYNOPSIS
Exports latest data from the Power BI Scanner API to a JSON file.
.DESCRIPTION
This script will get the latest available data from the Power BI Scanner API and write it to a .json file.
By default, the file will be saved in the user's Downloads folder with a timestamp in the filename.
The user can specify a custom path for the file to be saved by passing the -OutFile parameter.
The user can also specify to open the file in the default application after it's created by passing the -OpenFile switch.
.PARAMETER OutFile
The destination path for the JSON file. Defaults to "~\Downloads\PowerBIScannerApiData_{timestamp}.json"
.PARAMETER OpenFile
Specify to open the JSON file in the default application after it's created.
.PARAMETER BatchSize
The number of workspaces to scan in each batch. Valid values are 1-100. Defaults to 100.
.PARAMETER Organization
The name of the organization for the REST API call. Defaults to 'myorg'.
.INPUTS
- Parameters are currently the only way to pass input to this script
- Pipeline inputs are not yet supported
.OUTPUTS
- A .json file containing all available data from the Power BI Scanner API
- Pipeline outputs are not yet supported
.EXAMPLE
# Export data to "C:\temp\MyPowerBIScannerApiData.json"
.\Checkpoint-PowerBIScannerApiData.ps1 -OutFile "C:\temp\MyPowerBIScannerApiData.json"
.EXAMPLE
# Export data to the default location ("~\Downloads\PowerBIScannerApiData_{timestamp}.json")
# and open the file in the system's default .json file handler/editor
.\Checkpoint-PowerBIScannerApiData.ps1 -OpenFile
.NOTES
- Requires the Power BI Management module: https://docs.microsoft.com/en-us/powershell/power-bi/overview?view=powerbi-ps
- Requires the Power BI Scanner API to be enabled: https://learn.microsoft.com/en-us/power-bi/enterprise/service-admin-metadata-scanning#enabling-enhanced-metadata-scanning
- Currently only works with Power BI workspaces that have been modified in the last 30 days
- May not work properly in Power BI tenants with a lot of active workspaces (due to API rate limits)
ACKNOWLEDGEMENTS
- Thanks to my wife (@[email protected]) for her support and encouragement.
- Thanks to the PowerShell and Power BI/Fabric communities for being so awesome.
.LINK
[Source code](https://github.com/JamesDBartlett3/ps-for-pbi/blob/main/Checkpoint-PowerBIScannerApiData.ps1)
.LINK
[The author's blog](https://datavolume.xyz)
.LINK
[Follow the author on LinkedIn](https://www.linkedin.com/in/jamesdbartlett3/)
.LINK
[Follow the author on Mastodon](https://techhub.social/@JamesDBartlett3)
.LINK
[Follow the author on BlueSky](https://bsky.app/profile/jamesdbartlett3.bsky.social)
#>
Param(
[Parameter(Mandatory = $false)]
[string]$OutFile = "$HOME\Downloads\PowerBIScannerApiData_$(Get-Date -UFormat '%Y-%m-%d_%H%M').json",
[Parameter(Mandatory = $false)]
[switch]$OpenFile,
[Parameter(Mandatory = $false)][ValidateRange(1, 100)]
[int]$BatchSize = 100,
[Parameter(Mandatory = $false)]
[string]$Organization = 'myorg'
)
#Requires -Modules MicrosoftPowerBIMgmt
# Declare starting variables
[string]$baseUrl = "https://api.powerbi.com/v1.0/$Organization/admin/workspaces"
[string]$modifiedWorkspacesUrl = "$baseUrl/modified?excludePersonalWorkspaces=True"
[string]$getInfoUrl = "$baseUrl/getInfo?lineage=True&datasourceDetails=True&datasetSchema=True&datasetExpressions=True&getArtifactUsers=True"
$headers = [System.Collections.Generic.Dictionary[[String], [String]]]::New()
$scanResults = [PSCustomObject]@{
workspaces = @()
datasourceInstances = @()
misconfiguredDatasourceInstances = @()
}
try {
$headers = Get-PowerBIAccessToken
}
catch {
Write-Host '🔒 Power BI Access Token required. Launching Azure Active Directory authentication dialog...'
Start-Sleep -s 1
Connect-PowerBIServiceAccount -WarningAction SilentlyContinue | Out-Null
$headers = Get-PowerBIAccessToken
if ($headers) {
Write-Host '🔑 Power BI Access Token acquired. Proceeding...'
} else {
Write-Host '❌ Power BI Access Token not acquired. Exiting...'
Exit
}
}
# Send a GET request to the modified workspaces endpoint
$workspaceList = Invoke-RestMethod -Uri $modifiedWorkspacesUrl -Method Get -Headers $headers
# Create an object to hold the workspace IDs
$workspaceIdsObject = [PSCustomObject]@{
workspaces = @()
}
# Add the workspace IDs to the object
foreach ($w in $workspaceList) {
$workspaceIdsObject.workspaces += $w.id
}
# Get the number of workspaces
[int]$workspaceCount = $workspaceIdsObject.workspaces.Count
# Declare a variable to hold the workspace suffix (singular or plural)
[string]$workspaceSuffix = if ($workspaceCount -eq 1) {''} else {'s'}
# If no workspaces were found, exit the script
if ($workspaceCount -eq 0) {
Write-Host 'No workspaces found. Exiting...'
Exit
}
# Calculate the number of batches to run based on the number of workspaces and the batch size
[int]$batchesToRun = [Math]::Ceiling($workspaceCount / $batchSize)
# Declare a variable to hold the batch suffix (singular or plural)
[string]$batchSuffx = if ($batchesToRun -eq 1) {''} else {'es'}
Write-Host "Found $($workspaceCount) workspace$workspaceSuffix. Running $batchesToRun batch$batchSuffx of $batchSize..."
Write-Host '----------------------------------'
# Loop through the workspaces in batches of $batchSize, get the scanner API data for each batch, and add it to the $scanResults object
for ($i = 0; $i -lt $batchesToRun; $i++) {
$batchNum = $i + 1
Write-Host "Running batch $batchNum of $batchesToRun..."
$batchStart = $i * $batchSize
$batchEnd = $batchStart + $batchSize
$batch = $workspaceIdsObject.workspaces[$batchStart..$batchEnd]
$batchObject = [PSCustomObject]@{
workspaces = $batch
}
$jsonBody = $batchObject | ConvertTo-Json
# Send a POST request to the API endpoint
$startScanResponse = Invoke-RestMethod -Uri $getInfoUrl -Method Post -Headers $headers -Body $jsonBody -ContentType 'application/json'
$scanId = $startScanResponse.id
$scanStatusUrl = "$baseUrl/scanStatus/$scanId"
$scanStatus = ''
# Check the scan status every 5 seconds until it's complete
while ($scanStatus -ne 'Succeeded') {
if ($scanStatus -eq 'Running') {
Start-Sleep -s 5
}
$checkScanResponse = Invoke-RestMethod -Uri $scanStatusUrl -Method Get -Headers $headers
$scanStatus = $checkScanResponse.status
Write-Host "Batch $batchNum status: $scanStatus"
}
Write-Host "Batch $batchNum complete. Getting data..."
Write-Host '----------------------------------'
$completedScanId = $checkScanResponse.id
$batchResultUrl = "$baseUrl/scanResult/$completedScanId"
# Send a GET request to the API endpoint
$batchResult = Invoke-RestMethod -Uri $batchResultUrl -Method Get -Headers $headers
# Add the result to the $scanResults object
$scanResults.workspaces += $batchResult.workspaces
$scanResults.datasourceInstances += $batchResult.datasourceInstances
$scanResults.misconfiguredDatasourceInstances += $batchResult.misconfiguredDatasourceInstances
}
Write-Host "Finished scanning $workspaceCount workspace$workspaceSuffix ($batchesToRun batch$batchSuffx of $BatchSize)."
Write-Host "Writing data to file: $OutFile"
# Write the data to $OutFile
$scanResults | ConvertTo-Json -Depth 100 | Out-File -FilePath $OutFile
# Open the file in the default application if user passed the -OpenFile switch
if ($OpenFile) {
Write-Host 'Opening file...'
Invoke-Item $OutFile
}