diff --git a/README.md b/README.md index e9109feb..460a8200 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Surfer: Export your personal data in one click -# 🆕 **RECENT UPDATES (10/28/24): Surfer is evolving! We're currently working on a [new protocol](https://surferprotocol.org) to document personal data exporting. For the latest updates, visit our [project page](https://surferprotocol.org) or the [GitHub repo](https://github.com/surfer-org/protocol).** 🆕 +# 🆕 **RECENT UPDATES (10/28/24): Surfer is evolving! We're currently working on an [open-source framework](https://surferprotocol.org) to get and do stuff with your personal data. For the latest updates, visit our [project page](https://surferprotocol.org) or the [GitHub repo](https://github.com/surfer-org/protocol).** 🆕 [![Contributors][contributors-shield]][contributors-url] [![Forks][forks-shield]][forks-url] @@ -35,17 +35,14 @@ Currently, your personal data is scattered across hundreds of platforms and the We believe that personal data aggregation is the key to enabling truly useful, universal personal assistants. ## Currently Supported Platforms - -- Twitter Posts +- iMessages (Windows only) - Twitter Bookmarks -- LinkedIn Profile -- GitHub Repositories -- YouTube - Notion - ChatGPT History - Gmail -- iMessages (coming soon!) +- LinkedIn Connections (coming soon!) - Reddit (coming soon!) +- GitHub (coming soon!) ## How it works @@ -88,10 +85,10 @@ See the [open issues](https://github.com/CEREBRUS-MAXIMUS/Surfer-Data/issues) fo ### Short-Term - [x] Data being maintained/updated everyday -- [ ] Scheduled exports +- [x] Scheduled exports - [ ] Obtain a code signing certificate for Windows - [x] Replace `setTimeout` with `await` for script execution to ensure elements exist before scraping -- [ ] Implement robust error handling for the scraping process +- [ ] Iamplement robust error handling for the scraping process - [ ] Add support for more online platforms - [x] Add verbosity to runs diff --git a/assets/icon.icns b/assets/icon.icns index 1c2e24e3..759a74a1 100644 Binary files a/assets/icon.icns and b/assets/icon.icns differ diff --git a/assets/icon.ico b/assets/icon.ico deleted file mode 100644 index d2206634..00000000 Binary files a/assets/icon.ico and /dev/null differ diff --git a/assets/icon.png b/assets/icon.png index 5997293c..b88529fd 100644 Binary files a/assets/icon.png and b/assets/icon.png differ diff --git a/assets/icon.svg b/assets/icon.svg index 3cc569b1..966818da 100644 --- a/assets/icon.svg +++ b/assets/icon.svg @@ -1,40 +1,12 @@ - - - - - - - - - - - - - + + + + + + + + + + + diff --git a/docs/ADD_PLATFORMS.md b/docs/ADD_PLATFORMS.md index 19e22fb3..383ae1ba 100644 --- a/docs/ADD_PLATFORMS.md +++ b/docs/ADD_PLATFORMS.md @@ -2,7 +2,7 @@ To add support for a new platform in Surfer, follow these steps: -1. **Create a new directory**: In the `Companies` folder, create a new directory named after the company (e.g., `Companies/Salesforce`). +1. **Create a new directory**: In the `platforms` folder, create a new directory named after the company (e.g., `platforms/Salesforce`). 2. **Create the platform file**: Inside the new directory, create a JavaScript file named after the platform (e.g., `slack.js`). @@ -12,6 +12,7 @@ To add support for a new platform in Surfer, follow these steps: - `connectURL`: The URL for the platform's login page - `connectSelector`: A CSS selector for an element that indicates a successful login - `isUpdated` (optional): A boolean indicating if the platform's data is regularly updated + - `exportFrequency` (optional): The frequency at which the platform's data is exported (e.g., "daily", "weekly", "monthly") Example JSON structure: ```json @@ -20,7 +21,8 @@ To add support for a new platform in Surfer, follow these steps: "description": "Exports [specific data types].", "connectURL": "https://platform.com/login", "connectSelector": "CSS_SELECTOR_FOR_LOGGED_IN_STATE", - "isUpdated": true + "isUpdated": true, + "exportFrequency": "daily" } ``` diff --git a/package-lock.json b/package-lock.json index 054c931b..7b4d91a4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -47,6 +47,7 @@ "lucide-react": "^0.396.0", "mbox-parser": "^1.0.1", "next-themes": "^0.3.0", + "node-schedule": "^2.1.1", "npm": "^10.8.2", "openai": "^4.58.2", "os-browserify": "^0.3.0", @@ -9015,6 +9016,18 @@ "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", "devOptional": true }, + "node_modules/cron-parser": { + "version": "4.9.0", + "resolved": "https://registry.npmjs.org/cron-parser/-/cron-parser-4.9.0.tgz", + "integrity": "sha512-p0SaNjrHOnQeR8/VnfGbmg9te2kfyYSQ7Sc/j/6DtPL3JQvKxmjO9TSjNFpujqV3vEYYBvNNvXSxzyksBWAx1Q==", + "license": "MIT", + "dependencies": { + "luxon": "^3.2.1" + }, + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/cross-env": { "version": "7.0.3", "resolved": "https://registry.npmjs.org/cross-env/-/cross-env-7.0.3.tgz", @@ -15872,6 +15885,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/long-timeout": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/long-timeout/-/long-timeout-0.1.1.tgz", + "integrity": "sha512-BFRuQUqc7x2NWxfJBCyUrN8iYUYznzL9JROmRz1gZ6KlOIgmoD+njPVbb+VNn2nGMKggMsK79iUNErillsrx7w==", + "license": "MIT" + }, "node_modules/loose-envify": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", @@ -15918,6 +15937,15 @@ "react": "^16.5.1 || ^17.0.0 || ^18.0.0" } }, + "node_modules/luxon": { + "version": "3.5.0", + "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.5.0.tgz", + "integrity": "sha512-rh+Zjr6DNfUYR3bPwJEnuwDdqMbxZW7LOQfUN4B54+Cl+0o5zaU9RJ6bcidfDtC1cWCZXQ+nvX8bf6bAji37QQ==", + "license": "MIT", + "engines": { + "node": ">=12" + } + }, "node_modules/lz-string": { "version": "1.5.0", "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz", @@ -16698,6 +16726,20 @@ "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz", "integrity": "sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g==" }, + "node_modules/node-schedule": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/node-schedule/-/node-schedule-2.1.1.tgz", + "integrity": "sha512-OXdegQq03OmXEjt2hZP33W2YPs/E5BcFQks46+G2gAxs4gHOIVD1u7EqlYLYSKsaIpyKCK9Gbk0ta1/gjRSMRQ==", + "license": "MIT", + "dependencies": { + "cron-parser": "^4.2.0", + "long-timeout": "0.1.1", + "sorted-array-functions": "^1.3.0" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/nodemailer": { "version": "6.9.13", "resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-6.9.13.tgz", @@ -22512,6 +22554,12 @@ "node": ">=0.10.0" } }, + "node_modules/sorted-array-functions": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/sorted-array-functions/-/sorted-array-functions-1.3.0.tgz", + "integrity": "sha512-2sqgzeFlid6N4Z2fUQ1cvFmTOLRi/sEDzSQ0OKYchqgoPmQBVyM3959qYx3fpS6Esef80KjmpgPeEr028dP3OA==", + "license": "MIT" + }, "node_modules/source-map": { "version": "0.7.4", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.4.tgz", diff --git a/package.json b/package.json index 28c5a8ce..9c59d6c6 100644 --- a/package.json +++ b/package.json @@ -129,6 +129,7 @@ "lucide-react": "^0.396.0", "mbox-parser": "^1.0.1", "next-themes": "^0.3.0", + "node-schedule": "^2.1.1", "npm": "^10.8.2", "openai": "^4.58.2", "os-browserify": "^0.3.0", @@ -272,7 +273,7 @@ "notarize": { "teamId": "97SB7MA2WB" }, - "icon": "assets/mac-icon.icns" + "icon": "assets/icon.icns" }, "dmg": { "sign": false, diff --git a/src/main/main.ts b/src/main/main.ts index 245c0096..da0f5b50 100644 --- a/src/main/main.ts +++ b/src/main/main.ts @@ -23,6 +23,7 @@ const { download } = require('electron-dl'); import express from 'express'; import cors from 'cors'; import { getNotionCredentials, getTwitterCredentials } from './utils/network'; +import { scheduledJobs, scheduleNextExport, runInitialExports } from './utils/schedule'; // Preventing multiple instances of Surfer @@ -42,133 +43,156 @@ if (!gotTheLock) { }); } - - -const expressApp = express(); -expressApp.use(cors()); -expressApp.use(express.json()); - const port = 2024; -expressApp.get('/', (req, res) => { - // this would be the surferClient.connect() - res.send('Hello World'); -}); +// Add this function to check if server is running +const isServerRunning = async (): Promise => { + try { + const response = await fetch(`http://localhost:${port}/api/health`); + return response.status === 200; + } catch (error) { + return false; + } +}; -// Health check endpoint -expressApp.get('/api/health', (req, res) => { - res.json({ status: 'ok' }); -}); +// Replace the Express setup with this +const setupExpressServer = async () => { + const serverRunning = await isServerRunning(); + + if (serverRunning) { + console.log(`Server already running on port ${port}, skipping setup`); + return; + } -expressApp.post('/api/get', async (req, res) => { - console.log('GET REQUEST: ', req.body); - const { platformId } = req.body; + const expressApp = express(); + expressApp.use(cors()); + expressApp.use(express.json()); - mainWindow?.webContents.send('get-runs-request'); - const runsResponse: any = await new Promise((resolve) => { - ipcMain.once('get-runs-response', (event, runs) => resolve(runs)); + expressApp.get('/', (req, res) => { + // this would be the surferClient.connect() + res.send('Hello World'); }); - // Filter runs for this platform with successful status - const successfulRuns = runsResponse.filter( - (r: any) => r.platformId === platformId && r.status === 'success', - ); + // Health check endpoint + expressApp.get('/api/health', (req, res) => { + res.json({ status: 'ok' }); + }); - // Sort by startDate descending and get latest - const latestRun = successfulRuns.sort( - (a: any, b: any) => - new Date(b.startDate).getTime() - new Date(a.startDate).getTime(), - )[0]; + expressApp.post('/api/get', async (req, res) => { + console.log('GET REQUEST: ', req.body); + const { platformId } = req.body; - if (!latestRun) { - return res.status(404).json({ - success: false, - error: 'No successful runs found for this platform', + mainWindow?.webContents.send('get-runs'); + const runsResponse: any = await new Promise((resolve) => { + ipcMain.once('get-runs-response', (event, runs) => resolve(runs)); }); - } - console.log('latest run: ', latestRun.id); - const exportPath = fs.readdirSync(latestRun.exportPath); - const jsonFile = exportPath.find((file: any) => file.endsWith('.json')); + // Filter runs for this platform with successful status + const successfulRuns = runsResponse.filter( + (r: any) => r.platformId === platformId && r.status === 'success', + ); - if (!jsonFile) { - return res - .status(404) - .json({ success: false, error: 'No JSON file found in export path' }); - } + // Sort by startDate descending and get latest + const latestRun = successfulRuns.sort( + (a: any, b: any) => + new Date(b.endDate || b.startDate).getTime() - new Date(a.endDate || b.startDate).getTime(), + )[0]; - const filePath = path.join(latestRun.exportPath, jsonFile); - const fileData = JSON.parse(fs.readFileSync(filePath, 'utf8')); - res.json({ success: true, data: fileData }); -}); + if (!latestRun) { + return res.status(404).json({ + success: false, + error: 'No successful runs found for this platform', + }); + } -expressApp.post('/api/export', async (req, res) => { - console.log('Export request: ', req.body); - const { platformId } = req.body; + console.log('latest run: ', latestRun.id); + const exportPath = fs.readdirSync(latestRun.exportPath); + const jsonFile = exportPath.find((file: any) => file.endsWith('.json')); - try { - mainWindow?.webContents.send('element-found', platformId); + if (!jsonFile) { + return res + .status(404) + .json({ success: false, error: 'No JSON file found in export path' }); + } - // Get initial run with timeout - const currentRun: any = await new Promise((resolve) => { - ipcMain.once('run-started', (event, run) => resolve(run)); - }); + const filePath = path.join(latestRun.exportPath, jsonFile); + const fileData = JSON.parse(fs.readFileSync(filePath, 'utf8')); + res.json({ success: true, data: fileData }); + }); - console.log('Found current run:', currentRun.id); + expressApp.post('/api/export', async (req, res) => { + console.log('Export request: ', req.body); + const { platformId } = req.body; - // Monitor run status with timeout - const finalRun = await new Promise((resolve) => { - const checkRunStatus = async () => { - mainWindow?.webContents.send('get-runs-request'); - const runsResponse: any = await new Promise((resolve) => { - ipcMain.once('get-runs-response', (event, runs) => resolve(runs)); - }); + try { + mainWindow?.webContents.send('element-found', platformId); - const finalRun = runsResponse.find((r: any) => r.id === currentRun.id); - if (finalRun?.status === 'success') { - clearInterval(statusInterval); - resolve(finalRun); - } - }; + // Get initial run with timeout + const currentRun: any = await new Promise((resolve) => { + ipcMain.once('run-started', (event, run) => resolve(run)); + }); - const statusInterval = setInterval(checkRunStatus, 1000); - }); + console.log('Found current run:', currentRun.id); - console.log('final run status: ', finalRun.status); - // Process results - const latestRunPath = finalRun.exportPath; - if (!fs.existsSync(latestRunPath)) { - throw new Error('Export path not found'); - } + // Monitor run status with timeout + const finalRun = await new Promise((resolve) => { + const checkRunStatus = async () => { + mainWindow?.webContents.send('get-runs'); + const runsResponse: any = await new Promise((resolve) => { + ipcMain.once('get-runs-response', (event, runs) => resolve(runs)); + }); - const files = fs.readdirSync(latestRunPath); - const jsonFile = files.find((file) => file.endsWith('.json')); - if (!jsonFile) { - throw new Error('No JSON file found in export folder'); - } + const finalRun = runsResponse.find((r: any) => r.id === currentRun.id); + if (finalRun?.status === 'success') { + clearInterval(statusInterval); + resolve(finalRun); + } + }; - const filePath = path.join(latestRunPath, jsonFile); - const fileData = JSON.parse(fs.readFileSync(filePath, 'utf8')); + const statusInterval = setInterval(checkRunStatus, 1000); + }); - res.json({ - success: true, - data: fileData, - exportPath: path.dirname(filePath), - exportSize: getTotalFolderSize(path.dirname(filePath)), - }); - } catch (error) { - console.error('Export error:', error); - res.status(500).json({ - success: false, - error: error.message || 'Unknown export error', - }); - } -}); + console.log('final run status: ', finalRun.status); + // Process results + const latestRunPath = finalRun.exportPath; + if (!fs.existsSync(latestRunPath)) { + throw new Error('Export path not found'); + } -expressApp.listen(port, () => { - console.log(`Server is running on port ${port}`); -}); + const files = fs.readdirSync(latestRunPath); + const jsonFile = files.find((file) => file.endsWith('.json')); + if (!jsonFile) { + throw new Error('No JSON file found in export folder'); + } + + const filePath = path.join(latestRunPath, jsonFile); + const fileData = JSON.parse(fs.readFileSync(filePath, 'utf8')); + + res.json({ + success: true, + data: fileData, + exportPath: path.dirname(filePath), + exportSize: getTotalFolderSize(path.dirname(filePath)), + }); + } catch (error) { + console.error('Export error:', error); + res.status(500).json({ + success: false, + error: error.message || 'Unknown export error', + }); + } + }); + expressApp.listen(port, () => { + console.log(`Server is running on port ${port}`); + }).on('error', (err: any) => { + if (err.code === 'EADDRINUSE') { + console.log(`Port ${port} is busy, server not started`); + } else { + console.error('Server error:', err); + } + }); +}; autoUpdater.autoDownload = false; // Prevent auto-download autoUpdater.autoInstallOnAppQuit = false; @@ -324,6 +348,7 @@ const getPlatforms = async () => { needsConnection: metadata.needsConnection ?? true, connectURL: metadata.connectURL || null, connectSelector: metadata.connectSelector || null, + exportFrequency: metadata.exportFrequency || null }; }), ); @@ -377,7 +402,7 @@ class AppUpdater { } } -export let mainWindow: BrowserWindow | null = null; +let mainWindow: BrowserWindow | null = null; const isDebug = process.env.NODE_ENV === 'development' || process.env.DEBUG_PROD === 'true'; @@ -395,6 +420,7 @@ if (isDebug) { let isQuitting = false; + export const createWindow = async (visible: boolean = true) => { if (mainWindow) { return; @@ -427,6 +453,17 @@ export const createWindow = async (visible: boolean = true) => { mainWindow.loadURL(resolveHtmlPath('index.html')); + // Add this listener for renderer ready state + mainWindow.webContents.on('did-finish-load', () => { + console.log('Renderer ready, initializing exports'); + initializeExports(); + }); + + // Add this to reset renderer ready state when window is closed + mainWindow.on('closed', () => { + mainWindow = null; + }); + mainWindow.webContents.setWindowOpenHandler((details) => { shell.openExternal(details.url); // Open URL in user's browser. return { action: 'deny' }; // Prevent the app from opening the URL. @@ -601,9 +638,6 @@ export const createWindow = async (visible: boolean = true) => { console.log('Zip fully extracted to:', extractPath); if (url.includes('takeout-download.usercontent.google.com')) { - // Function to recursively find the MBOX file - - const mboxFilePath = findMboxFile(extractPath); if (mboxFilePath) { const jsonOutputPath = path.join(extractPath, `${platformId}.json`); @@ -689,6 +723,72 @@ export const createWindow = async (visible: boolean = true) => { }); }; +const initializeExports = async () => { + if (!mainWindow) { + console.log('Renderer not ready, waiting...'); + return; + } + + try { + const platforms = await getPlatforms(); + + // Get current runs + mainWindow.webContents.send('get-runs'); + const runsResponse: any = await new Promise((resolve) => { + ipcMain.once('get-runs-response', (event, runs) => resolve(runs)); + }); + + for (const platform of platforms) { + if (platform.exportFrequency) { + console.log(`Checking exports for ${platform.name}`); + await runInitialExports(platform, runsResponse); + scheduleNextExport(platform); + + } + } + } catch (error) { + console.error('Failed to initialize export scheduling:', error); + } +}; + +export const waitForExportCompletion = async (platformId: string): Promise => { + try { + // Trigger the export + mainWindow?.webContents.send('element-found', platformId); + + // Wait for run to start + const currentRun: any = await new Promise((resolve) => { + ipcMain.once('run-started', (event, run) => resolve(run)); + }); + + console.log('Export started, monitoring run:', currentRun.id); + + // Monitor run status until success + const finalRun = await new Promise((resolve) => { + const checkRunStatus = async () => { + mainWindow?.webContents.send('get-runs'); + const runsResponse: any = await new Promise((resolve) => { + ipcMain.once('get-runs-response', (event, runs) => resolve(runs)); + }); + + const finalRun = runsResponse.find((r: any) => r.id === currentRun.id); + if (finalRun?.status === 'success') { + clearInterval(statusInterval); + resolve(finalRun); + } + }; + + const statusInterval = setInterval(checkRunStatus, 1000); + }); + + console.log('Export completed successfully:', finalRun.id); + return finalRun; + } catch (error) { + console.error('Export failed:', error); + throw error; + } +}; + ipcMain.on('open-external', (event, url) => { shell.openExternal(url); }); @@ -1080,4 +1180,23 @@ ipcMain.on('open-platform-export-folder', (event, company, name) => { const exportFolderPath = path.join(app.getPath('userData'), 'surfer_data', company, name); console.log('exportFolderPath', exportFolderPath); shell.openPath(exportFolderPath); +}); + +app.on('before-quit', async () => { + scheduledJobs.forEach((job) => job.cancel()); + scheduledJobs.clear(); + + // Send stop-runs signal to renderer + mainWindow?.webContents.send('stop-runs'); + const stopRunsResponse: any = await new Promise((resolve) => { + ipcMain.once('runs-stopped', (event, runs) => resolve(runs)); + }); + + console.log('Runs stopped:', stopRunsResponse); +}); + +// Update the app.whenReady() to be async +app.whenReady().then(async () => { + await setupExpressServer(); + // ... rest of your whenReady code ... }); \ No newline at end of file diff --git a/src/main/platforms/Apple/imessage.json b/src/main/platforms/Apple/imessage.json index 288df0f0..b65cd198 100644 --- a/src/main/platforms/Apple/imessage.json +++ b/src/main/platforms/Apple/imessage.json @@ -1,6 +1,5 @@ { "name": "iMessage", "description": "Exports all your iMessages.", - "needsConnection": false, - "exportFrequency": "daily" + "needsConnection": false } \ No newline at end of file diff --git a/src/main/platforms/Notion/notion.js b/src/main/platforms/Notion/notion.js index 16dbc115..e4b175c8 100644 --- a/src/main/platforms/Notion/notion.js +++ b/src/main/platforms/Notion/notion.js @@ -50,7 +50,7 @@ async function exportNotion(id, platformId, filename, company, name) { exportType: 'markdown', timeZone: notionCredentials.timezone, collectionViewExportType: 'currentView', - flattenExportFiletree: true, + flattenExportFiletree: false, }, shouldExportComments: false } @@ -71,46 +71,58 @@ async function exportNotion(id, platformId, filename, company, name) { } const { taskId } = await response.json(); - customConsoleLog(id, `Export task enqueued with ID: ${taskId}`); + customConsoleLog(id, `Notion export started`); // Poll for export completion while (true) { - const taskResponse = await fetch(tasksUrl, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'cookie': notionCredentials.cookie - }, - body: JSON.stringify({ taskIds: [taskId] }) - }); - - if (!taskResponse.ok) { - throw new Error(`Failed to check task status: ${taskResponse.status}`); - } - - const taskData = await taskResponse.json(); - const taskResult = taskData.results?.[0]; + try { + const taskResponse = await fetch(tasksUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'cookie': notionCredentials.cookie + }, + body: JSON.stringify({ taskIds: [taskId] }) + }); - if (taskResult) { - const { state, status } = taskResult; - if (!state || !status) { - customConsoleLog(id, "This is taskData: ", taskData); - customConsoleLog(id, "This is state and status: ", state, status); + if (taskResponse.status === 429) { + customConsoleLog(id, 'Rate limit reached, waiting 5 seconds before retrying...'); + await wait(5); + continue; } - if (status && status.pagesExported) { - customConsoleLog(id, `Export progress: ${status.pagesExported || 0} pages exported`); + + if (!taskResponse.ok) { + throw new Error(`Failed to check task status: ${taskResponse.status}`); } - if (state === 'success' && status.type === 'complete') { - const exportUrl = status.exportURL; - customConsoleLog(id, 'Export completed successfully!'); - window.location.assign(exportUrl); - // You might want to download the file here or send the URL somewhere - return "DOWNLOADING"; + const taskData = await taskResponse.json(); + const taskResult = taskData.results?.[0]; + + if (taskResult) { + const { state, status } = taskResult; + // if (!state || !status) { + // customConsoleLog(id, "This is taskData: ", taskData); + // customConsoleLog(id, "This is state and status: ", state, status); + // } + if (status && status.pagesExported) { + customConsoleLog(id, `Export progress: ${status.pagesExported || 0} pages exported`); + } + + if (state === 'success' && status.type === 'complete') { + const exportUrl = status.exportURL; + customConsoleLog(id, 'Export completed successfully!'); + window.location.assign(exportUrl); + // You might want to download the file here or send the URL somewhere + return "DOWNLOADING"; + } } - } - await wait(2); + await wait(4); + } catch (error) { + customConsoleLog(id, `Error checking task status: ${error.message}`); + await wait(5); + continue; + } } } catch (error) { diff --git a/src/main/platforms/X Corp/bookmarks.json b/src/main/platforms/X Corp/bookmarks.json index 129e7a44..84fb75c1 100644 --- a/src/main/platforms/X Corp/bookmarks.json +++ b/src/main/platforms/X Corp/bookmarks.json @@ -5,5 +5,5 @@ "isUpdated": true, "connectURL": "https://twitter.com", "connectSelector": "img.css-9pa8cd", - "exportFrequency": "hourly" -} \ No newline at end of file + "exportFrequency": "daily" +} diff --git a/src/main/platforms/X Corp/feed.js b/src/main/platforms/X Corp/feed.js deleted file mode 100644 index 3619991e..00000000 --- a/src/main/platforms/X Corp/feed.js +++ /dev/null @@ -1,163 +0,0 @@ -const { - customConsoleLog, - wait, - waitForElement, -} = require('../../preloadFunctions'); -const { ipcRenderer } = require('electron'); -const fs = require('fs'); -const path = require('path'); - -async function checkIfPostExists(id, platformId, company, name, currentPost) { - const userData = await ipcRenderer.invoke('get-user-data-path'); - const filePath = path.join( - userData, - 'surfer_data', - company, - name, - platformId, - `${platformId}.json`, - ); - console.log(id, `Checking if file exists at ${filePath}`); - const fileExists = await fs.existsSync(filePath); - if (fileExists) { - console.log(id, `File exists, reading file`); - try { - const fileContent = fs.readFileSync(filePath, 'utf-8'); - if (fileContent.trim() === '') { - console.log(id, 'File is empty'); - return false; - } - const posts = JSON.parse(fileContent); - console.log(id, 'Posts: ', posts); - if (posts && posts.content && Array.isArray(posts.content)) { - for (const post of posts.content) { - if ( - post.timestamp === currentPost.timestamp && - post.text === currentPost.text - ) { - console.log(id, 'Post already exists, skipping'); - return true; - } - } - } else { - console.log(id, 'Invalid or empty posts structure'); - } - } catch (error) { - console.error(id, `Error reading or parsing file: ${error.message}`); - } - } - - return false; -} - -async function exportFeed(id, platformId, filename, company, name) { - if (!window.location.href.includes('x.com')) { - customConsoleLog(id, 'Navigating to Twitter'); - window.location.assign('https://x.com/'); - } - await wait(5); - - if (document.body.innerText.toLowerCase().includes('sign in to x')) { - customConsoleLog( - id, - 'YOU NEED TO SIGN IN (click the eye in the top right)!', - ); - ipcRenderer.send('connect-website', id); - return 'CONNECT_WEBSITE'; - } - customConsoleLog(id, 'Starting feed collection'); - - const feedArray = []; - let noNewPostsCount = 0; - - while (feedArray.length < 100 && noNewPostsCount < 3) { - const posts = await waitForElement( - id, - 'div[data-testid="cellInnerDiv"]', - 'Feed posts', - true, - ); - customConsoleLog(id, `Found ${posts.length} posts on the page`); - - if (posts.length === 0) { - customConsoleLog(id, 'No posts found, waiting 2 seconds before retry'); - await wait(2); - noNewPostsCount++; - continue; - } - - customConsoleLog(id, 'Processing new posts'); - const initialSize = feedArray.length; - - for (const post of posts) { - if (feedArray.length >= 100) break; - - post.scrollIntoView({ - behavior: 'instant', - block: 'end', - }); - - if (post.querySelector('time')) { - const jsonPost = { - text: post.innerText.replace(/\n/g, ' '), - timestamp: post.querySelector('time').getAttribute('datetime'), - author: - post.querySelector('div[data-testid="User-Name"]')?.innerText || - 'Unknown', - }; - - if ( - !feedArray.some( - (p) => - p.timestamp === jsonPost.timestamp && p.text === jsonPost.text, - ) - ) { - const postExists = await checkIfPostExists( - id, - platformId, - company, - name, - jsonPost, - ); - - if (postExists) { - customConsoleLog(id, 'Post already exists, skipping'); - continue; - } else { - ipcRenderer.send( - 'handle-update', - company, - name, - platformId, - JSON.stringify(jsonPost), - id, - ); - feedArray.push(jsonPost); - } - } - } - } - - const newPostsAdded = feedArray.length - initialSize; - customConsoleLog( - id, - `Added ${newPostsAdded} new unique posts. Total: ${feedArray.length}`, - ); - - if (newPostsAdded === 0) { - customConsoleLog(id, 'NO NEW POSTS ADDED, TRYING AGAIN!'); - noNewPostsCount++; - } else { - noNewPostsCount = 0; - } - - customConsoleLog(id, 'Waiting 2 seconds before getting more posts'); - await wait(2); - } - - customConsoleLog(id, `Exporting ${feedArray.length} feed posts`); - ipcRenderer.send('handle-update-complete', id, platformId, company, name); - return 'HANDLE_UPDATE_COMPLETE'; -} - -module.exports = exportFeed; diff --git a/src/main/platforms/X Corp/feed.json b/src/main/platforms/X Corp/feed.json deleted file mode 100644 index 49a090ac..00000000 --- a/src/main/platforms/X Corp/feed.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "Twitter Feed", - "description": "Exports 100 posts in your feed.", - "isUpdated": true, - "logoURL": "https://logo.clearbit.com/twitter.com", - "connectURL": "https://twitter.com", - "connectSelector": "img.css-9pa8cd" -} \ No newline at end of file diff --git a/src/main/platforms/X Corp/feed.md b/src/main/platforms/X Corp/feed.md deleted file mode 100644 index 999ffc4a..00000000 --- a/src/main/platforms/X Corp/feed.md +++ /dev/null @@ -1,22 +0,0 @@ -# Twitter Feed Scraper - -This scraper extracts the latest 100 posts from your Twitter feed. - -## Features - -- Automatically navigates to Twitter -- Checks for user authentication -- Scrolls through the feed to collect posts -- Extracts post text, timestamp, and author -- Saves data in JSON format - -## Usage - -1. Ensure you're logged into Twitter in the Electron browser -2. Run the scraper -3. Wait for the scraper to collect 100 posts or reach the end of available new posts -4. The collected data will be saved in the specified JSON file - -## Output Format - -The scraper saves the data in the following format: diff --git a/src/main/utils/schedule.ts b/src/main/utils/schedule.ts new file mode 100644 index 00000000..7f59a262 --- /dev/null +++ b/src/main/utils/schedule.ts @@ -0,0 +1,78 @@ +import schedule from 'node-schedule'; +import { isToday, parseISO, format } from 'date-fns'; +import { waitForExportCompletion } from '../main'; + +export const scheduledJobs = new Map(); + + +export const runInitialExports = async (platform: any, runs: any) => { + const now = parseISO(new Date().toISOString()); + const twentyFourHoursAgo = parseISO(new Date(now.getTime() - 24 * 60 * 60 * 1000).toISOString()); + const oneHourAgo = parseISO(new Date(now.getTime() - 60 * 60 * 1000).toISOString()); + + const todayRuns = runs.filter( + (run: any) => + run.platformId === platform.id && + run.status === 'success' && + parseISO(run.endDate || run.startDate) > twentyFourHoursAgo + ); + + const hourlyRuns = runs.filter( + (run: any) => + run.platformId === platform.id && + run.status === 'success' && + parseISO(run.endDate || run.startDate) > oneHourAgo + ); + + if (platform.exportFrequency === 'daily') { + if (todayRuns.length === 0) { + await waitForExportCompletion(platform.id); + } + else { + console.log(`Export already completed for ${platform.name}`) + } + } else if (platform.exportFrequency === 'hourly') { + if (hourlyRuns.length === 0) { + await waitForExportCompletion(platform.id); + } else { + console.log(`Export already completed for ${platform.name}`); + } + } +}; + +export const scheduleNextExport = (platform: any) => { + if (scheduledJobs.has(platform.id)) { + scheduledJobs.get(platform.id).cancel(); + } + + const now = parseISO(new Date().toISOString()); + let nextRun = parseISO(new Date().toISOString()); + + if (platform.exportFrequency === 'hourly') { + nextRun.setTime(now.getTime() + 60 * 1000 * 60); + console.log( + `Scheduled export for ${platform.name} for ${format(nextRun, 'yyyy-MM-dd HH:mm:ss')}`, + ); + } else if (platform.exportFrequency === 'daily') { + nextRun.setDate(now.getDate() + 1); + nextRun.setHours(0, 0, 0, 0); + console.log( + `Scheduled export for ${platform.name} for ${format(nextRun, 'yyyy-MM-dd HH:mm:ss')}`, + ); + } + + const job = schedule.scheduleJob(nextRun, async () => { + try { + await waitForExportCompletion(platform.id); + scheduleNextExport(platform); + console.log(`Export scheduled for ${platform.name}`) + } catch (error) { + console.error(`Scheduled export failed for ${platform.name}:`, error); + // Still schedule next run even if this one failed + scheduleNextExport(platform); + } + }); + + scheduledJobs.set(platform.id, job); +}; + diff --git a/src/renderer/Surfer.tsx b/src/renderer/Surfer.tsx index 071c749f..4e512bbf 100644 --- a/src/renderer/Surfer.tsx +++ b/src/renderer/Surfer.tsx @@ -3,9 +3,8 @@ import { useSelector, useDispatch } from 'react-redux'; import { IAppState } from './types/interfaces'; import Layout from './components/Layout'; import Home from './pages/Home'; -import Landing from './pages/Landing'; import Platform from './pages/Platform'; -import { setContentScale, setCurrentRoute, updateBreadcrumb, stopAllJobs, updateRunConnected } from './state/actions'; +import { setContentScale, setCurrentRoute, updateBreadcrumb, updateRunConnected } from './state/actions'; import { Alert, AlertTitle, AlertDescription } from './components/ui/alert'; import { Toaster } from './components/ui/toaster'; import { Progress } from './components/ui/progress'; diff --git a/src/renderer/components/PlatformDashboard.jsx b/src/renderer/components/PlatformDashboard.jsx index b72907cd..19fe53f2 100644 --- a/src/renderer/components/PlatformDashboard.jsx +++ b/src/renderer/components/PlatformDashboard.jsx @@ -43,15 +43,35 @@ useEffect(() => { window.electron.ipcRenderer.send('get-runs-response', runs); }; - window.electron.ipcRenderer.on('get-runs-request', handleGetRunsRequest); + window.electron.ipcRenderer.on('get-runs', handleGetRunsRequest); // Cleanup listener return () => { - window.electron.ipcRenderer.removeAllListeners('get-runs-request', handleGetRunsRequest); + window.electron.ipcRenderer.removeAllListeners('get-runs', handleGetRunsRequest); }; }, [runs]); +useEffect(() => { + window.electron.ipcRenderer.on('stop-runs', () => { + // Stop all pending or running runs + const activeRuns = runs.filter(run => + run && (run.status === 'pending' || run.status === 'running') + ); + + // Stop each run + activeRuns.forEach(run => { + dispatch(stopRun(run.id)); + }); + + // Notify main process that runs have been stopped + window.electron.ipcRenderer.sendMessage('runs-stopped'); + }); + // Cleanup listener + return () => { + window.electron.ipcRenderer.removeAllListeners('stop-runs'); + }; +}, [runs]); // Add runs as dependency const getLatestRun = (platformId) => { const platformRuns = runs.filter(run => run.platformId === platformId); @@ -69,7 +89,6 @@ useEffect(() => { const loadPlatforms = async () => { try { const platforms = await window.electron.ipcRenderer.invoke('get-platforms'); - console.log('PLATFORMS: ', platforms); setAllPlatforms(platforms); } catch (error) { @@ -92,7 +111,6 @@ useEffect(() => { useEffect(() => { const checkConnectedPlatforms = async () => { const connected = await window.electron.ipcRenderer.invoke('check-connected-platforms', allPlatforms); - console.log('CONNECTED PLATFORMS: ', connected); setConnectedPlatforms(connected); }; @@ -116,31 +134,31 @@ useEffect(() => { }; }, [allPlatforms]); - // useEffect(() => { - // const runisUpdateds = async () => { - // if (runs.length === 0) return; - - // for (const platform of filteredPlatforms) { - // if (platform.isUpdated) { - // const platformRuns = runs.filter(run => run.platformId === platform.id); - // if (platformRuns.length > 0) { - // const today = new Date().toISOString().split('T')[0]; - // const runsForToday = platformRuns.filter(run => - // (run.status === 'success' || run.status === 'running') && - // run.startDate.split('T')[0] === today - // ); - // console.log('runsForToday: ', runsForToday); - // if (runsForToday.length === 0) { - // await handleExportClick(platform); - // await new Promise(resolve => setTimeout(resolve, 5000)); - // } - // } - // } - // } - // }; - - // runisUpdateds(); - // }, [filteredPlatforms]); + useEffect(() => { + const runisUpdateds = async () => { + if (runs.length === 0) return; + + for (const platform of filteredPlatforms) { + if (platform.isUpdated) { + const platformRuns = runs.filter(run => run.platformId === platform.id); + if (platformRuns.length > 0) { + const today = new Date().toISOString().split('T')[0]; + const runsForToday = platformRuns.filter(run => + (run.status === 'success' || run.status === 'running') && + run.startDate.split('T')[0] === today + ); + console.log('runsForToday: ', runsForToday); + if (runsForToday.length === 0) { + await handleExportClick(platform); + await new Promise(resolve => setTimeout(resolve, 5000)); + } + } + } + } + }; + + runisUpdateds(); + }, [allPlatforms]); const pageCount = Math.ceil(filteredPlatforms.length / itemsPerPage); @@ -242,7 +260,7 @@ const renderRunStatus = (platform) => { return
; } - const logLines = latestRun.logs.split('\n'); + const logLines = latestRun && latestRun.logs ? latestRun.logs.split('\n') : []; switch (latestRun.status) { case 'running': diff --git a/src/renderer/components/RunDetails.jsx b/src/renderer/components/RunDetails.jsx index 02277567..c1fcbc6f 100644 --- a/src/renderer/components/RunDetails.jsx +++ b/src/renderer/components/RunDetails.jsx @@ -49,7 +49,6 @@ const RunDetails = ({ runId, onClose, platform }) => { const handleFiles = (files) => { console.log('Files:', files); setFiles(files || []); - console.log('Files:', files); }; window.electron.ipcRenderer.on('run-files', handleFiles); @@ -121,7 +120,7 @@ const RunDetails = ({ runId, onClose, platform }) => { - {run.status === 'success' && files.length > 0 && ( + {run.status === 'success' && files && files.length > 0 && (
- {run.logs.length > 0 ? ( + {run && run.logs && run.logs.length > 0 ? (
{run.logs.split('\n').map((log, index) => (