-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit ebf610d
Showing
9 changed files
with
294 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
config.json | ||
tmp/ | ||
dump.db |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# frozen_string_literal: true | ||
|
||
source "https://rubygems.org" | ||
|
||
gem "sqlite3" | ||
gem "mini_sql" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
GEM | ||
remote: https://rubygems.org/ | ||
specs: | ||
mini_sql (1.4.0) | ||
sqlite3 (1.6.2-x86_64-linux) | ||
|
||
PLATFORMS | ||
x86_64-linux | ||
|
||
DEPENDENCIES | ||
mini_sql | ||
sqlite3 | ||
|
||
BUNDLED WITH | ||
2.4.7 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
### Public Data Dump for you forum | ||
|
||
This repo attempts to establish a pattern for a public data dump. It includes 2 data explorer queries you can use to export all your public data. | ||
|
||
Public data is defined as forum topics and posts that anonymous users can access. | ||
|
||
### How to use this? | ||
|
||
First you need to define 2 queries using data explorer: | ||
|
||
1. Topic query: [here](topic_query.sql) | ||
2. Post query: [here](post_query.sql) | ||
|
||
Once defined note the data explorer query ids as specified in the URL | ||
|
||
Next, define an API key with rights to run the 2 queries. | ||
|
||
### config.json | ||
|
||
Create a [config.json](config.json.sample) specifying the domain of your discourse site, api key and data explorer query ids. | ||
|
||
### Importing the site into Sqlite | ||
|
||
The first phase of the import is importing the site into a sqlite3 db. This intermediary db stores all the content. | ||
|
||
Run: `ruby download_topics.rb` | ||
|
||
### Importing the Sqlite db into Discourse | ||
|
||
1. Start with a blank DB | ||
2. ... (in progress) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"api_username": "USER", | ||
"api_key": "API_KEY", | ||
"topics_query_id": "QUERY_ID", | ||
"posts_query_id": "QUERY_ID", | ||
"domain": "YOUR_DISCOURSE.com" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
require "json" | ||
require "net/http" | ||
require "uri" | ||
require "sqlite3" | ||
require "mini_sql" | ||
require "cgi" | ||
|
||
begin | ||
config = JSON.parse(File.read("config.json")) | ||
rescue StandardError | ||
puts "Please create a file called .creds with your API KEY and USERNAME" | ||
end | ||
|
||
# Replace these values with your Discourse instance details | ||
DISCOURSE_DOMAIN = config["domain"] | ||
API_KEY = config["api_key"] | ||
API_USERNAME = config["api_username"] | ||
TOPIC_QUERY_ID = config["topics_query_id"] | ||
POST_QUERY_ID = config["posts_query_id"] | ||
|
||
sqlite_conn = SQLite3::Database.new("dump.db") | ||
conn = MiniSql::Connection.get(sqlite_conn) | ||
|
||
def run_report(query_id:, min_id: 0, limit:) | ||
params = CGI.escape({ min_id: min_id.to_s }.to_json) | ||
|
||
uri = | ||
URI( | ||
"https://#{DISCOURSE_DOMAIN}/admin/plugins/explorer/queries/#{query_id}/run?limit=#{limit}¶ms=#{params}" | ||
) | ||
http = Net::HTTP.new(uri.host, uri.port) | ||
http.use_ssl = true | ||
|
||
request = Net::HTTP::Post.new(uri.request_uri) | ||
request["Content-Type"] = "application/json" | ||
request["Api-Key"] = API_KEY | ||
request["Api-Username"] = API_USERNAME | ||
|
||
response = http.request(request) | ||
JSON.parse(response.body) | ||
end | ||
|
||
def create_schema(conn) | ||
conn.exec <<-SQL | ||
CREATE TABLE IF NOT EXISTS topics ( | ||
id INTEGER PRIMARY KEY, | ||
category, | ||
title, | ||
created_at, | ||
user_id, | ||
tags | ||
) | ||
SQL | ||
|
||
conn.exec <<-SQL | ||
CREATE TABLE IF NOT EXISTS users( | ||
id INTEGER PRIMARY KEY, | ||
username, | ||
name | ||
) | ||
SQL | ||
|
||
conn.exec <<-SQL | ||
CREATE TABLE IF NOT EXISTS posts( | ||
id INTEGER PRIMARY KEY, | ||
raw, | ||
post_number, | ||
topic_id, | ||
user_id, | ||
created_at | ||
) | ||
SQL | ||
end | ||
|
||
def load_posts(conn, rows) | ||
highest_id = 0 | ||
posts_loaded = 0 | ||
|
||
conn.exec "BEGIN TRANSACTION" | ||
|
||
rows.each do |row| | ||
conn.exec <<~SQL, *row | ||
INSERT OR IGNORE INTO posts (id, raw, post_number, topic_id, user_id, created_at) | ||
VALUES (?, ?, ?, ?, ?, ?) | ||
SQL | ||
posts_loaded += 1 | ||
highest_id = row[0] if row[0] > highest_id | ||
end | ||
|
||
conn.exec "COMMIT TRANSACTION" | ||
|
||
{ highest_id: highest_id, posts_loaded: posts_loaded } | ||
end | ||
|
||
def load_topics(conn, rows) | ||
highest_id = 0 | ||
topics_loaded = 0 | ||
|
||
conn.exec "BEGIN TRANSACTION" | ||
|
||
rows.each do |row| | ||
conn.exec <<~SQL, *row | ||
INSERT OR IGNORE INTO topics (id, category, title, created_at, user_id, tags) | ||
VALUES (?, ?, ?, ?, ?, ?) | ||
SQL | ||
topics_loaded += 1 | ||
highest_id = row[0] if row[0] > highest_id | ||
end | ||
|
||
conn.exec "COMMIT TRANSACTION" | ||
|
||
{ highest_id: highest_id, topics_loaded: topics_loaded } | ||
end | ||
|
||
def load_users(conn, rows) | ||
conn.exec "BEGIN TRANSACTION" | ||
loaded = 0 | ||
|
||
rows.each do |row| | ||
conn.exec <<~SQL, *row | ||
INSERT OR IGNORE INTO users(id, username, name) | ||
VALUES (?, ?, ?) | ||
SQL | ||
loaded += 1 | ||
end | ||
|
||
conn.exec "COMMIT TRANSACTION" | ||
loaded | ||
end | ||
|
||
def load_users_from_json(conn, json) | ||
users = json.dig("relations", "user") | ||
if users | ||
users = users.map { |user| [user["id"], user["username"], user["name"]] } | ||
loaded = load_users(conn, users) | ||
puts "Loaded #{loaded} users" | ||
end | ||
end | ||
|
||
create_schema(conn) | ||
|
||
min_id = 0 | ||
while true | ||
response_data = | ||
run_report(query_id: TOPIC_QUERY_ID, min_id: min_id, limit: 10_000) | ||
|
||
load_users_from_json(conn, response_data) | ||
|
||
result = load_topics(conn, response_data["rows"]) | ||
puts "Loaded #{result[:topics_loaded]} topics (highest id is #{result[:highest_id]})" | ||
|
||
min_id = result[:highest_id] | ||
break if result[:topics_loaded] == 0 | ||
end | ||
|
||
min_id = 0 | ||
while true | ||
response_data = | ||
run_report(query_id: POST_QUERY_ID, min_id: min_id, limit: 10_000) | ||
|
||
load_users_from_json(conn, response_data) | ||
|
||
result = load_posts(conn, response_data["rows"]) | ||
puts "Loaded #{result[:posts_loaded]} posts (highest id is #{result[:highest_id]})" | ||
|
||
min_id = result[:highest_id] | ||
break if result[:posts_loaded] == 0 | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
require "sqlite3" | ||
require "mini_sql" | ||
|
||
sqlite_conn = SQLite3::Database.new("dump.db") | ||
conn = MiniSql::Connection.get(sqlite_conn) | ||
|
||
Dir.chdir("/home/sam/Source/discourse") | ||
require "/home/sam/Source/discourse/config/environment" | ||
|
||
puts "Importing users..." | ||
|
||
created = 0 | ||
conn | ||
.query("SELECT * FROM users") | ||
.each do |row| | ||
if !User.exists?(row.id) | ||
User.create( | ||
id: row.id, | ||
username: row.username, | ||
name: row.username, | ||
password: SecureRandom.hex, | ||
email: "#{SecureRandom.hex}@email.com" | ||
) | ||
end | ||
print "." | ||
created += 1 | ||
puts "#{created} users created" if created % 500 == 0 | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
-- [params] | ||
-- int :min_id = 0 | ||
|
||
SELECT | ||
p.id, | ||
p.raw, | ||
p.post_number, | ||
p.topic_id, | ||
p.user_id, | ||
p.created_at | ||
FROM topics t | ||
JOIN posts p ON p.topic_id = t.id | ||
JOIN categories c ON c.id = t.category_id | ||
WHERE NOT c.read_restricted | ||
AND t.deleted_at IS NULL | ||
AND p.deleted_at IS NULL | ||
AND p.post_type = 1 | ||
AND NOT p.hidden | ||
AND p.id > :min_id | ||
ORDER BY p.id ASC | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
-- [params] | ||
-- int :min_id = 0 | ||
|
||
SELECT | ||
t.id, | ||
c.name, | ||
t.title, | ||
t.created_at, | ||
t.user_id, | ||
(SELECT STRING_AGG(tag.name, ', ') FROM topic_tags tt JOIN tags tag ON tag.id = tt.tag_id WHERE tt.topic_id = t.id) AS all_tags | ||
FROM topics t | ||
JOIN categories c ON c.id = t.category_id | ||
WHERE NOT c.read_restricted AND t.deleted_at IS NULL | ||
AND t.id > :min_id | ||
ORDER BY t.id ASC |