延續上篇 https://n.sfs.tw/content/index/16452
parse html
class Base(DeclarativeBase):
pass
class School(Base):
__tablename__ = "school"
name: Mapped[str] = mapped_column(primary_key=True)
level: Mapped[str] = mapped_column(String(30))
date: Mapped[str] = mapped_column(String(30))
# Constructor to initialize attributes
def __init__(self, name, level, date):
self.name = name
self.level = level
self.date = date
schools = []
for i in range(1, 13):
file = str(i) + ".html"
print(file)
# Opening the html file
HTMLFile = open(file, "r", encoding="utf-8")
# Reading the file
index = HTMLFile.read()
# Creating a BeautifulSoup object and specifying the parser
soup = BeautifulSoup(index, 'html.parser')
for item in soup.select('div.item'):
for title in (item.select("div.list-title")):
if title.text == "機關名稱":
school = (item.select_one("div.list-content"))
name = school.text.strip()
level = item.select_one("span.item-level").text.strip()
date = (item.select_one("span.item-date")).text.strip()
if "國小" in name or \
"國中" in name or \
"高中" in name or \
"國民小學" in name or \
"國民中學" in name or \
"學校" in name or \
"高級中學" in name:
if not any(map(lambda school: school.name == name, schools)):
school = School(name, level, date)
schools.append(school)
利用sqlalchemy orm 連接sqlite 寫入資料
# 寫入資料庫
engine = create_engine("sqlite:///school.db", echo=True)
session = Session(engine)
for school in schools:
stmt = select(School).where(School.name == school.name)
if not session.scalars(stmt).first():
session.add(school)
session.commit()
else:
print("done." + school.name)
#讀出資料庫裡所有資料
stmt = select(School)
result = session.scalars(stmt)
schools = []
for school in result:
schools.append(school)
輸出csv
i=1
with open('summary統計.csv', 'w', newline='', encoding="utf-8") as csvfile:
# 建立 CSV 檔寫入器
writer = csv.writer(csvfile)
writer.writerow(["通過校數:"+ str(len(schools))])
for school in schools:
writer.writerow([i,school.name, school.level, school.date])
i=i+1
csvfile.close
輸出pdf
pdf = FPDF()
# Add a Unicode free font
pdf.add_font(fname='TW-Kai-98_1.ttf')
pdf.set_font('TW-Kai-98_1', size=16)
pdf.add_page()
pdf.cell(0, 20, 'summary統計', new_x="LMARGIN", new_y="NEXT", align='C')
pdf.set_font('TW-Kai-98_1', size=12)
x1 = 100
x2 = 40
x3 = 40
sn = 5
pdf.set_draw_color(0, 80, 180)
pdf.cell(sn, 10, " ", border="B")
pdf.cell(x1, 10, summary, border="B")
pdf.cell(x2, 10, "", border="B")
pdf.cell(x3, 10, "", border="B", new_x="LMARGIN", new_y="NEXT")
i = 0
for school in schools:
i = i+1
pdf.cell(sn, 10, str(i))
pdf.cell(x1, 10, school.name)
pdf.cell(x2, 10, school.level)
pdf.cell(x3, 10, school.date, new_x="LMARGIN", new_y="NEXT")
pdf.output('summary統計.pdf')
Cell width. If 0, the cell extends up to the right margin.
x1 = 100, 表示這個cell 往右延伸 100個單位
下一個x2 就會從x1的寬度再開始計算40
http://www.fpdf.org/en/doc/cell.htm
參考
https://py-pdf.github.io/fpdf2/Tutorial.html
