Skip to content

Commit

Permalink
added twitter feed export
Browse files Browse the repository at this point in the history
  • Loading branch information
t0mCS committed Sep 23, 2024
1 parent c322246 commit d17e33b
Show file tree
Hide file tree
Showing 4 changed files with 240 additions and 19 deletions.
169 changes: 169 additions & 0 deletions src/main/Scrapers/X Corp/feed.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
const {
customConsoleLog,
wait,
waitForElement,
bigStepper,
} = require('../../preloadFunctions');
const { ipcRenderer } = require('electron');
const fs = require('fs');
const path = require('path');

async function checkIfPostExists(id, platformId, company, name, currentPost) {
const userData = await ipcRenderer.invoke('get-user-data-path');
const filePath = path.join(
userData,
'surfer_data',
company,
name,
platformId,
`${platformId}.json`,
);
console.log(id, `Checking if file exists at ${filePath}`);
const fileExists = await fs.existsSync(filePath);
if (fileExists) {
console.log(id, `File exists, reading file`);
try {
const fileContent = fs.readFileSync(filePath, 'utf-8');
if (fileContent.trim() === '') {
console.log(id, 'File is empty');
return false;
}
const posts = JSON.parse(fileContent);
console.log(id, 'Posts: ', posts);
if (posts && posts.content && Array.isArray(posts.content)) {
for (const post of posts.content) {
if (
post.timestamp === currentPost.timestamp &&
post.text === currentPost.text
) {
console.log(id, 'Post already exists, skipping');
return true;
}
}
} else {
console.log(id, 'Invalid or empty posts structure');
}
} catch (error) {
console.error(id, `Error reading or parsing file: ${error.message}`);
}
}

return false;
}

async function exportFeed(id, platformId, filename, company, name) {
if (!window.location.href.includes('x.com')) {
bigStepper(id, 'Navigating to Twitter');
customConsoleLog(id, 'Navigating to Twitter');
window.location.assign('https://x.com/');
}
await wait(5);

if (document.body.innerText.toLowerCase().includes('sign in to x')) {
bigStepper(id, 'Export stopped, waiting for sign in');
customConsoleLog(
id,
'YOU NEED TO SIGN IN (click the eye in the top right)!',
);
ipcRenderer.send('connect-website', id);
return 'CONNECT_WEBSITE';
}

bigStepper(id, 'Getting feed posts...');
customConsoleLog(id, 'Starting feed collection');

const feedArray = [];
let noNewPostsCount = 0;

while (feedArray.length < 100 && noNewPostsCount < 3) {
const posts = await waitForElement(
id,
'div[data-testid="cellInnerDiv"]',
'Feed posts',
true,
);
customConsoleLog(id, `Found ${posts.length} posts on the page`);

if (posts.length === 0) {
customConsoleLog(id, 'No posts found, waiting 2 seconds before retry');
await wait(2);
noNewPostsCount++;
continue;
}

customConsoleLog(id, 'Processing new posts');
const initialSize = feedArray.length;

for (const post of posts) {
if (feedArray.length >= 100) break;

post.scrollIntoView({
behavior: 'instant',
block: 'end',
});

if (post.querySelector('time')) {
const jsonPost = {
text: post.innerText.replace(/\n/g, ' '),
timestamp: post.querySelector('time').getAttribute('datetime'),
author:
post.querySelector('div[data-testid="User-Name"]')?.innerText ||
'Unknown',
};

if (
!feedArray.some(
(p) =>
p.timestamp === jsonPost.timestamp && p.text === jsonPost.text,
)
) {
const postExists = await checkIfPostExists(
id,
platformId,
company,
name,
jsonPost,
);

if (postExists) {
customConsoleLog(id, 'Post already exists, skipping');
continue;
} else {
ipcRenderer.send(
'handle-update',
company,
name,
platformId,
JSON.stringify(jsonPost),
id,
);
feedArray.push(jsonPost);
}
}
}
}

const newPostsAdded = feedArray.length - initialSize;
customConsoleLog(
id,
`Added ${newPostsAdded} new unique posts. Total: ${feedArray.length}`,
);

if (newPostsAdded === 0) {
customConsoleLog(id, 'NO NEW POSTS ADDED, TRYING AGAIN!');
noNewPostsCount++;
} else {
noNewPostsCount = 0;
}

customConsoleLog(id, 'Waiting 2 seconds before getting more posts');
await wait(2);
}

customConsoleLog(id, `Exporting ${feedArray.length} feed posts`);
bigStepper(id, 'Exporting data');
ipcRenderer.send('handle-update-complete', id, platformId, company, name);
return 'HANDLE_UPDATE_COMPLETE';
}

module.exports = exportFeed;
8 changes: 8 additions & 0 deletions src/main/Scrapers/X Corp/feed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "Twitter Feed",
"description": "Exports 100 posts in you feed.",
"isUpdated": true,
"logoURL": "https://logo.clearbit.com/twitter.com",
"connectURL": "https://twitter.com",
"connectSelector": "img.css-9pa8cd"
}
22 changes: 22 additions & 0 deletions src/main/Scrapers/X Corp/feed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Twitter Feed Scraper

This scraper extracts the latest 100 posts from your Twitter feed.

## Features

- Automatically navigates to Twitter
- Checks for user authentication
- Scrolls through the feed to collect posts
- Extracts post text, timestamp, and author
- Saves data in JSON format

## Usage

1. Ensure you're logged into Twitter in the Electron browser
2. Run the scraper
3. Wait for the scraper to collect 100 posts or reach the end of available new posts
4. The collected data will be saved in the specified JSON file

## Output Format

The scraper saves the data in the following format:
60 changes: 41 additions & 19 deletions src/main/Scrapers/X Corp/twitter.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
const { customConsoleLog, wait, waitForElement, bigStepper } = require('../../preloadFunctions');
const {
customConsoleLog,
wait,
waitForElement,
bigStepper,
} = require('../../preloadFunctions');
const { ipcRenderer } = require('electron');
const fs = require('fs');
const path = require('path');
Expand Down Expand Up @@ -55,7 +60,10 @@ async function exportTwitter(id, platformId, filename, company, name) {
await wait(5);
if (document.body.innerText.toLowerCase().includes('sign in to x')) {
bigStepper(id, 'Export stopped, waiting for sign in');
customConsoleLog(id, 'YOU NEED TO SIGN IN (click the eye in the top right)!');
customConsoleLog(
id,
'YOU NEED TO SIGN IN (click the eye in the top right)!',
);
ipcRenderer.send('connect-website', id);
return 'CONNECT_WEBSITE';
}
Expand All @@ -70,14 +78,17 @@ async function exportTwitter(id, platformId, filename, company, name) {
customConsoleLog(id, 'Got profile picture!');

if (!profilePics) {
customConsoleLog(id, 'YOU NEED TO SIGN IN (click the eye in the top right)!');
customConsoleLog(
id,
'YOU NEED TO SIGN IN (click the eye in the top right)!',
);
ipcRenderer.send('connect-website', id);
return 'CONNECT_WEBSITE';
}

bigStepper(id, 'Clicking on Profile Picture');
profilePics[1].click();
await wait(2);
await wait(2);

const tweetArray = [];
let noNewTweetsCount = 0;
Expand All @@ -100,11 +111,11 @@ async function exportTwitter(id, platformId, filename, company, name) {
await wait(2);
noNewTweetsCount++;
continue;
}
}

customConsoleLog(id, 'Processing new tweets');
const initialSize = tweetArray.length;

for (const tweet of tweets) {
tweet.scrollIntoView({
behavior: 'instant',
Expand All @@ -117,12 +128,29 @@ async function exportTwitter(id, platformId, filename, company, name) {
timestamp: tweet.querySelector('time').getAttribute('datetime'),
};

if (!tweetArray.some(t => t.timestamp === jsonTweet.timestamp && t.text === jsonTweet.text)) {
const tweetExists = await checkIfTweetExists(id, platformId, company, name, jsonTweet);

if (tweetExists) {
if (
!tweetArray.some(
(t) =>
t.timestamp === jsonTweet.timestamp && t.text === jsonTweet.text,
)
) {
const tweetExists = await checkIfTweetExists(
id,
platformId,
company,
name,
jsonTweet,
);

if (tweetExists) {
customConsoleLog(id, 'Tweet already exists, skipping');
ipcRenderer.send('handle-update-complete', id, platformId, company, name);
ipcRenderer.send(
'handle-update-complete',
id,
platformId,
company,
name,
);
return 'HANDLE_UPDATE_COMPLETE';
} else {
ipcRenderer.send(
Expand Down Expand Up @@ -158,14 +186,8 @@ async function exportTwitter(id, platformId, filename, company, name) {

customConsoleLog(id, `Exporting ${tweetArray.length} tweets`);
bigStepper(id, 'Exporting data');
ipcRenderer.send(
'handle-update-complete',
id,
platformId,
company,
name,
);
ipcRenderer.send('handle-update-complete', id, platformId, company, name);
return 'HANDLE_UPDATE_COMPLETE';
}

module.exports = exportTwitter;
module.exports = exportTwitter;

0 comments on commit d17e33b

Please sign in to comment.