diff --git a/.env.sample b/.env.sample new file mode 100644 index 0000000..fc6272a --- /dev/null +++ b/.env.sample @@ -0,0 +1,4 @@ +IMAP_USER="user@example.com" +IMAP_PASSWORD="password" +IMAP_HOST="imap.example.com" +IMAP_PORT="993" diff --git a/.gitignore b/.gitignore index 3b58d13..3b63094 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,6 @@ cython_debug/ # PyPI configuration file .pypirc + +# main.py output file +out/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..130a539 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +## zst-38-notatki +### What does this do? +This python script downloads messages from an IMAP server, blindly parses them trying to extract the shitty-formatted notes our teacher sends us, and then saves them as JSON to be served by a webserver. + +### Setup +```bash +python3 -m venv venv +source venv/bin/activate +pip3 install -r requirements.txt +cp .env.sample .env +nano .env +python3 main.py +``` diff --git a/main.py b/main.py index e69de29..d90f077 100644 --- a/main.py +++ b/main.py @@ -0,0 +1,54 @@ +import json +import os +import imaplib +import email +from datetime import datetime +from dotenv import load_dotenv +from bs4 import BeautifulSoup + +load_dotenv() +IMAP_USER = os.getenv("IMAP_USER") +IMAP_PASSWORD = os.getenv("IMAP_PASSWORD") +IMAP_HOST = os.getenv("IMAP_HOST") +IMAP_PORT = int(os.getenv("IMAP_PORT")) + +emails = [] + +with imaplib.IMAP4_SSL(host=IMAP_HOST, port=IMAP_PORT) as imap_ssl: + resp_code, response = imap_ssl.login(IMAP_USER, IMAP_PASSWORD) + print(f'Login: {resp_code}') + resp_code, mail_count = imap_ssl.select(mailbox="INBOX", readonly=True) + print(f'Select: {resp_code}, found {mail_count[0].decode()} messages in INBOX') + resp_code, mail_ids = imap_ssl.search(None, "ALL") + print(f'Search: {resp_code}, IDs: {mail_ids[0].decode().split()}') + + for mail_id in mail_ids[0].decode().split(): + resp_code, mail_data = imap_ssl.fetch(mail_id, '(RFC822)') + message = email.message_from_bytes(mail_data[0][1]) + + for part in message.walk(): + if part.get_content_type() == "text/html": + content = part.get_payload(decode=True).decode(part.get_content_charset()) + soup = BeautifulSoup(content, 'html.parser') + text = soup.get_text() + + partition = text.partition('\r\n\r\n') + + guessed_topic = partition[0] + guessed_topic = guessed_topic.replace('\r\n', ' ') + guessed_topic = guessed_topic.replace('"', '') + guessed_topic = guessed_topic.replace(' ', ' ') + guessed_topic = guessed_topic.replace(' .', '.') + guessed_body = partition[2] + + recv_date = message.get("Date") + parsed_date = datetime.strptime(recv_date, "%a, %d %b %Y %H:%M:%S %z") + iso_date = parsed_date.isoformat() + + emails.append({"date": iso_date, "guessed_topic": guessed_topic, "guessed_body": guessed_body}) + print(f'Parsed {mail_id}, received at {iso_date}, guessed topic: {guessed_topic}') + +with open("out/output.json", "w", encoding="utf-8") as file: + json.dump(emails, file, ensure_ascii=False, indent=4) + +print('Wrote to file :)') diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..de28cb9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +python-dotenv~=1.0.1 +beautifulsoup4~=4.13.3 \ No newline at end of file