first working version
This commit is contained in:
parent
4203b0baee
commit
6adbae4b57
5 changed files with 76 additions and 0 deletions
4
.env.sample
Normal file
4
.env.sample
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
IMAP_USER="user@example.com"
|
||||||
|
IMAP_PASSWORD="password"
|
||||||
|
IMAP_HOST="imap.example.com"
|
||||||
|
IMAP_PORT="993"
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -172,3 +172,6 @@ cython_debug/
|
||||||
|
|
||||||
# PyPI configuration file
|
# PyPI configuration file
|
||||||
.pypirc
|
.pypirc
|
||||||
|
|
||||||
|
# main.py output file
|
||||||
|
out/
|
||||||
|
|
13
README.md
Normal file
13
README.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
## zst-38-notatki
|
||||||
|
### What does this do?
|
||||||
|
This python script downloads messages from an IMAP server, blindly parses them trying to extract the shitty-formatted notes our teacher sends us, and then saves them as JSON to be served by a webserver.
|
||||||
|
|
||||||
|
### Setup
|
||||||
|
```bash
|
||||||
|
python3 -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
pip3 install -r requirements.txt
|
||||||
|
cp .env.sample .env
|
||||||
|
nano .env
|
||||||
|
python3 main.py
|
||||||
|
```
|
54
main.py
54
main.py
|
@ -0,0 +1,54 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import imaplib
|
||||||
|
import email
|
||||||
|
from datetime import datetime
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
IMAP_USER = os.getenv("IMAP_USER")
|
||||||
|
IMAP_PASSWORD = os.getenv("IMAP_PASSWORD")
|
||||||
|
IMAP_HOST = os.getenv("IMAP_HOST")
|
||||||
|
IMAP_PORT = int(os.getenv("IMAP_PORT"))
|
||||||
|
|
||||||
|
emails = []
|
||||||
|
|
||||||
|
with imaplib.IMAP4_SSL(host=IMAP_HOST, port=IMAP_PORT) as imap_ssl:
|
||||||
|
resp_code, response = imap_ssl.login(IMAP_USER, IMAP_PASSWORD)
|
||||||
|
print(f'Login: {resp_code}')
|
||||||
|
resp_code, mail_count = imap_ssl.select(mailbox="INBOX", readonly=True)
|
||||||
|
print(f'Select: {resp_code}, found {mail_count[0].decode()} messages in INBOX')
|
||||||
|
resp_code, mail_ids = imap_ssl.search(None, "ALL")
|
||||||
|
print(f'Search: {resp_code}, IDs: {mail_ids[0].decode().split()}')
|
||||||
|
|
||||||
|
for mail_id in mail_ids[0].decode().split():
|
||||||
|
resp_code, mail_data = imap_ssl.fetch(mail_id, '(RFC822)')
|
||||||
|
message = email.message_from_bytes(mail_data[0][1])
|
||||||
|
|
||||||
|
for part in message.walk():
|
||||||
|
if part.get_content_type() == "text/html":
|
||||||
|
content = part.get_payload(decode=True).decode(part.get_content_charset())
|
||||||
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
|
text = soup.get_text()
|
||||||
|
|
||||||
|
partition = text.partition('\r\n\r\n')
|
||||||
|
|
||||||
|
guessed_topic = partition[0]
|
||||||
|
guessed_topic = guessed_topic.replace('\r\n', ' ')
|
||||||
|
guessed_topic = guessed_topic.replace('"', '')
|
||||||
|
guessed_topic = guessed_topic.replace(' ', ' ')
|
||||||
|
guessed_topic = guessed_topic.replace(' .', '.')
|
||||||
|
guessed_body = partition[2]
|
||||||
|
|
||||||
|
recv_date = message.get("Date")
|
||||||
|
parsed_date = datetime.strptime(recv_date, "%a, %d %b %Y %H:%M:%S %z")
|
||||||
|
iso_date = parsed_date.isoformat()
|
||||||
|
|
||||||
|
emails.append({"date": iso_date, "guessed_topic": guessed_topic, "guessed_body": guessed_body})
|
||||||
|
print(f'Parsed {mail_id}, received at {iso_date}, guessed topic: {guessed_topic}')
|
||||||
|
|
||||||
|
with open("out/output.json", "w", encoding="utf-8") as file:
|
||||||
|
json.dump(emails, file, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
|
print('Wrote to file :)')
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
python-dotenv~=1.0.1
|
||||||
|
beautifulsoup4~=4.13.3
|
Loading…
Add table
Reference in a new issue