-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmoneyreader.py
27 lines (20 loc) · 882 Bytes
/
moneyreader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
import PyPDF2
import re
pdf_folder = 'pdf/'
pdf_files = [f for f in os.listdir(pdf_folder) if f.endswith('.pdf')]
total_money_amount = 0
for pdf_file in pdf_files:
with open(os.path.join(pdf_folder, pdf_file), 'rb') as pdf:
pdf_reader = PyPDF2.PdfReader(pdf)
content = ''
for page in range(len(pdf_reader.pages)):
content += pdf_reader.pages[page].extract_text()
money_regex = r'(?:Gesamt|Zwischen)summe.*?(\d{1,3}(?:\.\d{3})*(?:,\d{2})?\s*(?:€|EUR))'
match = re.search(money_regex, content, re.DOTALL)
if match:
money_amount = match.group(1)
money_amount = money_amount.replace('.', '').replace(',', '.').replace('EUR', '').replace('€', '').strip()
total_money_amount += float(money_amount)
total_money_amount = round(total_money_amount, 2)
print("Total Amount:", total_money_amount)