first working version

2025-03-02 15:21:31 +01:00 · 2025-03-02 15:21:31 +01:00 · 6adbae4b57
commit 6adbae4b57
parent 4203b0baee
5 changed files with 76 additions and 0 deletions
--- a/.env.sample
+++ b/.env.sample
@ -0,0 +1,4 @@
+IMAP_USER="user@example.com"
+IMAP_PASSWORD="password"
+IMAP_HOST="imap.example.com"
+IMAP_PORT="993"
--- a/.gitignore
+++ b/.gitignore
@ -172,3 +172,6 @@ cython_debug/

 # PyPI configuration file
 .pypirc
+
+# main.py output file
+out/
--- a/README.md
+++ b/README.md
@ -0,0 +1,13 @@
+## zst-38-notatki
+### What does this do? 
+This python script downloads messages from an IMAP server, blindly parses them trying to extract the shitty-formatted notes our teacher sends us, and then saves them as JSON to be served by a webserver.
+
+### Setup
+```bash
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+cp .env.sample .env
+nano .env
+python3 main.py
+```
--- a/main.py
+++ b/main.py
@ -0,0 +1,54 @@
+import json
+import os
+import imaplib
+import email
+from datetime import datetime
+from dotenv import load_dotenv
+from bs4 import BeautifulSoup
+
+load_dotenv()
+IMAP_USER = os.getenv("IMAP_USER")
+IMAP_PASSWORD = os.getenv("IMAP_PASSWORD")
+IMAP_HOST = os.getenv("IMAP_HOST")
+IMAP_PORT = int(os.getenv("IMAP_PORT"))
+
+emails = []
+
+with imaplib.IMAP4_SSL(host=IMAP_HOST, port=IMAP_PORT) as imap_ssl:
+    resp_code, response = imap_ssl.login(IMAP_USER, IMAP_PASSWORD)
+    print(f'Login: {resp_code}')
+    resp_code, mail_count = imap_ssl.select(mailbox="INBOX", readonly=True)
+    print(f'Select: {resp_code}, found {mail_count[0].decode()} messages in INBOX')
+    resp_code, mail_ids = imap_ssl.search(None, "ALL")
+    print(f'Search: {resp_code}, IDs: {mail_ids[0].decode().split()}')
+
+    for mail_id in mail_ids[0].decode().split():
+        resp_code, mail_data = imap_ssl.fetch(mail_id, '(RFC822)')
+        message = email.message_from_bytes(mail_data[0][1])
+
+        for part in message.walk():
+            if part.get_content_type() == "text/html":
+                content = part.get_payload(decode=True).decode(part.get_content_charset())
+                soup = BeautifulSoup(content, 'html.parser')
+                text = soup.get_text()
+
+                partition = text.partition('\r\n\r\n')
+
+                guessed_topic = partition[0]
+                guessed_topic = guessed_topic.replace('\r\n', ' ')
+                guessed_topic = guessed_topic.replace('"', '')
+                guessed_topic = guessed_topic.replace('  ', ' ')
+                guessed_topic = guessed_topic.replace(' .', '.')
+                guessed_body = partition[2]
+
+                recv_date = message.get("Date")
+                parsed_date = datetime.strptime(recv_date, "%a, %d %b %Y %H:%M:%S %z")
+                iso_date = parsed_date.isoformat()
+
+                emails.append({"date": iso_date, "guessed_topic": guessed_topic, "guessed_body": guessed_body})
+                print(f'Parsed {mail_id}, received at {iso_date}, guessed topic: {guessed_topic}')
+
+with open("out/output.json", "w", encoding="utf-8") as file:
+    json.dump(emails, file, ensure_ascii=False, indent=4)
+
+print('Wrote to file :)')
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+python-dotenv~=1.0.1
+beautifulsoup4~=4.13.3