-
Notifications
You must be signed in to change notification settings - Fork 239
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Jan Harrie
committed
Jul 2, 2017
1 parent
d00917e
commit 70c7ca5
Showing
13 changed files
with
463 additions
and
409 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,57 +1,22 @@ | ||
# Python Flathunter-Helper | ||
|
||
## Status | ||
- [X] Telegram Notifier | ||
- [X] Datastorage to store processed IDs | ||
- [X] Immobilienscout24 Crawler | ||
- [ ] WG-Gesucht Crawler | ||
- [X] Initial Crawler | ||
- [ ] Definition of Parameter | ||
- [ ] Optional enble/disable | ||
- [ ] Ebay Kleinanzeigen Crawler | ||
Original from Jan Harrie, adopted by Bene | ||
|
||
|
||
## Required | ||
- Webscraping for Python2.7 (http://bitbucket.org/richardpenman/webscraping) | ||
- Telegram for Python2.7 (https://github.com/liluo/telegram) | ||
## Requirements | ||
sudo -H pip3 install -r requirements.txt | ||
|
||
**Install:** | ||
|
||
pip2.7 install telegram | ||
pip2.7 install webscraping | ||
|
||
## Usage | ||
|
||
usage: main.py [-h] [-v] Bot-Token User-ID URL [URL ...] | ||
|
||
Crawls Immobilienscout24.de and sends results to Telegram User | ||
|
||
positional arguments: | ||
Bot-Token The secret token of the Telegram Bot | ||
User-ID ID of the Telegram User | ||
URL An URL to Immobilienscout24.de search result | ||
|
||
optional arguments: | ||
-h, --help show this help message and exit | ||
-v, --verbose Enable Verbose output | ||
|
||
Designed by Jan Harrie (c) [email protected] | ||
|
||
Example Output | ||
|
||
$ ./main.py -v 239_____________________CMtCKSPu7KIhg 1XXXXX090 https://www.immobilienscout24.de/Suche/S-T/Wohnung-Miete/Bayern/Muenchen/-/XXXXXXXXXXXXXXXXXg | ||
[2016/08/31 21:58:40|main.py |INFO ]: Start Immoscout Crawler | ||
[2016/08/31 21:58:40|main.py |INFO ]: Process URL no.#0 | ||
[2016/08/31 21:58:40|main.py |DEBUG ]: https://www.immobilienscout24.de/Suche/S-T/Wohnung-Miete/Bayern/Muenchen/-/XXXXXXXXXXXXXXXXXg | ||
[2016/08/31 21:58:50|main.py |INFO ]: 0 new offer found | ||
[2016/08/31 21:58:50|main.py |INFO ]: Process URL no.#1 | ||
[2016/08/31 21:58:50|main.py |DEBUG ]: wg-gesucht | ||
[2016/08/31 21:58:50|main.py |INFO ]: Stopped Immoscout Crawler | ||
[2016/08/31 21:58:50|main.py |DEBUG ]: Start WG-Gesucht Crawler | ||
[2016/08/31 21:59:04|main.py |INFO ]: 0 new offer found | ||
[2016/08/31 21:59:04|main.py |INFO ]: Stopped WG-Gesucht Crawler | ||
usage: flathunter.py [-h] [--config CONFIG] | ||
|
||
Searches for flats on Immobilienscout24.de and wg-gesucht.de and sends results | ||
to Telegram User | ||
|
||
And crawl like a Boss | ||
optional arguments: | ||
-h, --help show this help message and exit | ||
--config CONFIG, -c CONFIG | ||
Config file to use. If not set, ./comfig.yaml is used. | ||
|
||
$ while do ./main.py 239_____________________CMtCKSPu7KIhg 1XXXXX090 https://www.immobilienscout24.de/Suche/S-T/Wohnung-Miete/Bayern/Muenchen/-/XXXXXXXXXXXXXXXXXg ; echo "sleep 600"; sleep 600; done | ||
Designed by Jan Harrie (c) [email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# Should the bot endlessly looop through the URLs? | ||
# Between each loop it waits for <sleeping_time> seconds. | ||
loop: | ||
active: yes | ||
sleeping_time: 10 | ||
|
||
# List the URLs containing your filter properties below. | ||
# Currently supported services: www.immobilienscout24.de and | ||
# www.wg-gesucht.de. List the URLs in the following format: | ||
# urls: | ||
# - https://www.immobilienscout24.de/Suche/... | ||
# - https://www.wg-gesucht.de/... | ||
urls: | ||
|
||
# If an expose includes an address, the bot is capable of | ||
# displaying the distance and time to travel (duration) to | ||
# some configured other addresses, for specific kinds of | ||
# travel. | ||
# | ||
# Available kinds of travel ('gm_id') can be found in the | ||
# Google Maps API documentation, but basically there are: | ||
# - "bicyle" | ||
# - "transit" (public transport) | ||
# - "driving" | ||
# | ||
# The example configuration below includes a place for | ||
# "John", located at the main train station of munich. | ||
# Two kinds of travel (bicycle and transit) are requested, | ||
# each with a different label. Furthermore a place for | ||
# "Jane" is included, located at the given destination and | ||
# with the same kinds of travel. | ||
durations: | ||
- name: John | ||
destination: Hauptbahnhof, München | ||
modes: | ||
- gm_id: transit | ||
title: "Öff." | ||
- gm_id: bicycle | ||
title: "Rad" | ||
- name: Jane | ||
destination: Karlsplatz, München | ||
modes: | ||
- gm_id: transit | ||
title: "Öff." | ||
- gm_id: driving | ||
title: "Auto" | ||
|
||
# Multiline message (yes, the | is supposed to be there), | ||
# to format the message received from the Telegram bot. | ||
# | ||
# Available placeholders: | ||
# - {title}: The title of the expose | ||
# - {rooms}: Number of rooms | ||
# - {price}: Price for the flat | ||
# - {durations}: Durations calculated by GMaps, see above | ||
# - {url}: URL to the expose | ||
message: | | ||
{title} | ||
Zimmer: {rooms} | ||
Größe: {size} | ||
Preis: {price} | ||
Anfahrt: | ||
{durations} | ||
|
||
{url} | ||
|
||
# Calculating durations requires access to the Google Maps API. | ||
# Below you can configure the URL to access the API, with placeholders. | ||
# The URL should most probably just kept like that. | ||
# To use the Google Maps API, an API key is required. You can obtain one | ||
# without costs from the Google App Console (just google for it). | ||
google_maps_api: | ||
key: YOUR_API_KEY | ||
url: https://maps.googleapis.com/maps/api/distancematrix/json?origins={origin}&destinations={dest}&mode={mode}&sensor=true&key={key}&arrival_time={arrival} | ||
|
||
# Sending messages using Telegram requires a Telegram Bot configured. | ||
# Telegram.org offers a good documentation about how to create a bot. | ||
# Once you read it, will make sense. Still: bot_token should hold the | ||
# access token of your bot and receiver_ids should list the client ids | ||
# of receivers. Note that those receivers are required to already have | ||
# started a conversation with your bot. | ||
telegram: | ||
bot_token: | ||
receiver_ids: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
__author__ = "Jan Harrie" | ||
__version__ = "1.0" | ||
__maintainer__ = "Jan Harrie" | ||
__email__ = "[email protected]" | ||
__status__ = "Production" | ||
|
||
|
||
import argparse, os, logging, time, requests, yaml, re, urllib.parse, requests | ||
|
||
from flathunter.immosearch import ImmoSearcher | ||
from flathunter.wgsearch import WGSearcher | ||
from flathunter.idmaintainer import IdMaintainer | ||
import flathunter.util as util | ||
|
||
|
||
def hunt_flats(config, searchers, id_watch): | ||
logger = logging.getLogger() | ||
bot_token = config.get('telegram',dict()).get('bot_token','') | ||
receiver_ids = config.get('telegram',dict()).get('receiver_ids',list()) | ||
new_links = 0 | ||
processed = id_watch.get() | ||
|
||
for url in config.get('urls', list()): | ||
logger.debug('Processing URL: ' + url) | ||
|
||
try: | ||
for searcher in searchers: | ||
if re.search(searcher.URL_PATTERN, url): | ||
results = searcher.get_results(url) | ||
break | ||
except requests.exceptions.ConnectionError: | ||
logger.warning("Connection to %s failed. Retrying. " % url.split('/')[2]) | ||
continue | ||
|
||
# on error, stop execution | ||
if not results: | ||
break | ||
|
||
for expose in results: | ||
# check if already processed | ||
if expose['id'] in processed: | ||
continue | ||
|
||
logger.info('New offer: ' + expose['title']) | ||
|
||
# to reduce traffic, some addresses need to be loaded on demand | ||
address = expose['address'] | ||
if address.startswith('http'): | ||
url = address | ||
for searcher in searchers: | ||
if re.search(searcher.URL_PATTERN, url): | ||
address = searcher.load_address(url) | ||
logger.debug("Loaded address %s for url %s" % (address, url)) | ||
break | ||
|
||
# calculdate durations | ||
message = config.get('message', "").format( | ||
title=expose['title'], | ||
rooms=expose['rooms'], | ||
size=expose['size'], | ||
price=expose['price'], | ||
url=expose['url'], | ||
durations=get_formatted_durations(config, address)).strip() | ||
|
||
# send message to all receivers | ||
for receiver_id in receiver_ids: | ||
send_msg(bot_token, receiver_id, message) | ||
|
||
new_links = new_links + 1 | ||
id_watch.add(expose['id']) | ||
|
||
logger.info(str(new_links) + ' new offer found') | ||
|
||
def send_msg(bot_token, chat_id, message): | ||
logger = logging.getLogger() | ||
|
||
url = 'https://api.telegram.org/%s/sendMessage?chat_id=%i&text=%s' | ||
text = urllib.parse.quote_plus(message.encode('utf-8')) | ||
qry = url % (bot_token, chat_id, text) | ||
logger.debug("Retrieving URL %s" % qry) | ||
resp = requests.get(qry) | ||
logger.debug("Got response (%i): %s" % (resp.status_code, resp.content)) | ||
data = resp.json() | ||
|
||
# handle error | ||
if resp.status_code != 200: | ||
sc = resp.status_code | ||
logger.error("When sending bot message, we got status %i with message: %s" % (sc, data)) | ||
|
||
def get_formatted_durations(config, address): | ||
out = "" | ||
for duration in config.get('durations', list()): | ||
if 'destination' in duration and 'name' in duration: | ||
dest = duration.get('destination') | ||
name = duration.get('name') | ||
for mode in duration.get('modes', list()): | ||
if 'gm_id' in mode and 'title' in mode: | ||
duration = util.getDistance(config, address, dest, mode['gm_id']) | ||
out += "> %s (%s): %s\n" % (name, mode['title'], duration) | ||
|
||
return out.strip() | ||
|
||
def launch_flat_hunt(config): | ||
searchers = [ ImmoSearcher(), WGSearcher() ] | ||
id_watch = IdMaintainer('./processed_ids.db') | ||
|
||
hunt_flats(config, searchers, id_watch) | ||
while config.get('loop',dict()).get('active',False): | ||
hunt_flats(config, searchers, id_watch) | ||
time.sleep(config.get('loop',dict()).get('sleepting_time', 10)) | ||
|
||
def main(): | ||
# init logging | ||
cyellow = '\033[93m' | ||
cblue = '\033[94m' | ||
coff = '\033[0m' | ||
logging.basicConfig( | ||
format='[' + cblue + '%(asctime)s' + coff + '|' + cblue + '%(filename)-18s' + coff+ '|'\ | ||
+ cyellow + '%(levelname)-8s' + coff + ']: %(message)s', | ||
datefmt='%Y/%m/%d %H:%M:%S', level=logging.INFO) | ||
logger = logging.getLogger() | ||
|
||
# parse args | ||
parser = argparse.ArgumentParser(description="Searches for flats on Immobilienscout24.de and "\ | ||
"wg-gesucht.de and sends results to Telegram User", | ||
epilog="Designed by Jan Harrie (c) [email protected]") | ||
parser.add_argument('--config', '-c', type=argparse.FileType('r', encoding='UTF-8'), | ||
default='./config.yaml', help="Config file to use. If not set, ./comfig.yaml "\ | ||
"is used. ") | ||
args = parser.parse_args() | ||
|
||
# load config | ||
configHandle = args.config | ||
logger.info("Using config %s" % configHandle.name) | ||
config = yaml.load(configHandle.read()) | ||
|
||
# check config | ||
if not config.get('telegram',dict()).get('bot_token'): | ||
logger.error("No telegram bot token configured. Starting like this would be meaningless...") | ||
return | ||
if not config.get('telegram',dict()).get('receiver_ids'): | ||
logger.error("No telegram receivers configured. Starting like this would be meaningless...") | ||
return | ||
if not config.get('urls'): | ||
logger.error("No urls configured. Starting like this would be meaningless...") | ||
return | ||
|
||
# adjust log level, if required | ||
if config.get('verbose'): | ||
logger.setLevel(logging.DEBUG) | ||
from pprint import pformat | ||
logger.debug("Settings from config: %s" % pformat(config)) | ||
|
||
# start hunting for flats | ||
launch_flat_hunt(config) | ||
|
||
if __name__ == "__main__": | ||
main() |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.