import json import os import imaplib import email from datetime import datetime from dotenv import load_dotenv from bs4 import BeautifulSoup load_dotenv() IMAP_USER = os.getenv("IMAP_USER") IMAP_PASSWORD = os.getenv("IMAP_PASSWORD") IMAP_HOST = os.getenv("IMAP_HOST") IMAP_PORT = int(os.getenv("IMAP_PORT")) emails = [] with imaplib.IMAP4_SSL(host=IMAP_HOST, port=IMAP_PORT) as imap_ssl: resp_code, response = imap_ssl.login(IMAP_USER, IMAP_PASSWORD) print(f'Login: {resp_code}') resp_code, mail_count = imap_ssl.select(mailbox="INBOX", readonly=True) print(f'Select: {resp_code}, found {mail_count[0].decode()} messages in INBOX') resp_code, mail_ids = imap_ssl.search(None, "ALL") print(f'Search: {resp_code}, IDs: {mail_ids[0].decode().split()}') for mail_id in mail_ids[0].decode().split(): resp_code, mail_data = imap_ssl.fetch(mail_id, '(RFC822)') message = email.message_from_bytes(mail_data[0][1]) for part in message.walk(): if part.get_content_type() == "text/html": content = part.get_payload(decode=True).decode(part.get_content_charset()) soup = BeautifulSoup(content, 'html.parser') text = soup.get_text() partition = text.partition('\r\n\r\n') guessed_topic = partition[0] guessed_topic = guessed_topic.replace('\r\n', ' ') guessed_topic = guessed_topic.replace('"', '') guessed_topic = guessed_topic.replace(' ', ' ') guessed_topic = guessed_topic.replace(' .', '.') guessed_body = partition[2] recv_date = message.get("Date") parsed_date = datetime.strptime(recv_date, "%a, %d %b %Y %H:%M:%S %z") iso_date = parsed_date.isoformat() emails.append({"date": iso_date, "guessed_topic": guessed_topic, "guessed_body": guessed_body}) print(f'Parsed {mail_id}, received at {iso_date}, guessed topic: {guessed_topic}') with open("out/output.json", "w", encoding="utf-8") as file: json.dump(emails, file, ensure_ascii=False, indent=4) print('Wrote to file :)')