延續上篇 https://n.sfs.tw/content/index/16452
parse html
class Base(DeclarativeBase): pass class School(Base): __tablename__ = "school" name: Mapped[str] = mapped_column(primary_key=True) level: Mapped[str] = mapped_column(String(30)) date: Mapped[str] = mapped_column(String(30)) # Constructor to initialize attributes def __init__(self, name, level, date): self.name = name self.level = level self.date = date schools = [] for i in range(1, 13): file = str(i) + ".html" print(file) # Opening the html file HTMLFile = open(file, "r", encoding="utf-8") # Reading the file index = HTMLFile.read() # Creating a BeautifulSoup object and specifying the parser soup = BeautifulSoup(index, 'html.parser') for item in soup.select('div.item'): for title in (item.select("div.list-title")): if title.text == "機關名稱": school = (item.select_one("div.list-content")) name = school.text.strip() level = item.select_one("span.item-level").text.strip() date = (item.select_one("span.item-date")).text.strip() if "國小" in name or \ "國中" in name or \ "高中" in name or \ "國民小學" in name or \ "國民中學" in name or \ "學校" in name or \ "高級中學" in name: if not any(map(lambda school: school.name == name, schools)): school = School(name, level, date) schools.append(school)
利用sqlalchemy orm 連接sqlite 寫入資料
# 寫入資料庫 engine = create_engine("sqlite:///school.db", echo=True) session = Session(engine) for school in schools: stmt = select(School).where(School.name == school.name) if not session.scalars(stmt).first(): session.add(school) session.commit() else: print("done." + school.name) #讀出資料庫裡所有資料 stmt = select(School) result = session.scalars(stmt) schools = [] for school in result: schools.append(school)
輸出csv
i=1 with open('summary統計.csv', 'w', newline='', encoding="utf-8") as csvfile: # 建立 CSV 檔寫入器 writer = csv.writer(csvfile) writer.writerow(["通過校數:"+ str(len(schools))]) for school in schools: writer.writerow([i,school.name, school.level, school.date]) i=i+1 csvfile.close
輸出pdf
pdf = FPDF() # Add a Unicode free font pdf.add_font(fname='TW-Kai-98_1.ttf') pdf.set_font('TW-Kai-98_1', size=16) pdf.add_page() pdf.cell(0, 20, 'summary統計', new_x="LMARGIN", new_y="NEXT", align='C') pdf.set_font('TW-Kai-98_1', size=12) x1 = 100 x2 = 40 x3 = 40 sn = 5 pdf.set_draw_color(0, 80, 180) pdf.cell(sn, 10, " ", border="B") pdf.cell(x1, 10, summary, border="B") pdf.cell(x2, 10, "", border="B") pdf.cell(x3, 10, "", border="B", new_x="LMARGIN", new_y="NEXT") i = 0 for school in schools: i = i+1 pdf.cell(sn, 10, str(i)) pdf.cell(x1, 10, school.name) pdf.cell(x2, 10, school.level) pdf.cell(x3, 10, school.date, new_x="LMARGIN", new_y="NEXT") pdf.output('summary統計.pdf')
Cell width. If 0
, the cell extends up to the right margin.
x1 = 100, 表示這個cell 往右延伸 100個單位
下一個x2 就會從x1的寬度再開始計算40
http://www.fpdf.org/en/doc/cell.htm
參考
https://py-pdf.github.io/fpdf2/Tutorial.html