python 解析資料, 輸入並輸出 database, csv, pdf(二)

URL Link //n.sfs.tw/16454

2024-09-24 14:24:22 By igogo

 

 

延續上篇 https://n.sfs.tw/content/index/16452

 

parse html

class Base(DeclarativeBase):
    pass

class School(Base):
    __tablename__ = "school"

    name: Mapped[str] = mapped_column(primary_key=True)
    level: Mapped[str] = mapped_column(String(30))
    date: Mapped[str] = mapped_column(String(30))

    # Constructor to initialize attributes

    def __init__(self, name, level, date):
        self.name = name
        self.level = level
        self.date = date


schools = []

for i in range(1, 13):

    file = str(i) + ".html"
    print(file)
    # Opening the html file
    HTMLFile = open(file, "r", encoding="utf-8")
    # Reading the file
    index = HTMLFile.read()

    # Creating a BeautifulSoup object and specifying the parser
    soup = BeautifulSoup(index, 'html.parser')

    for item in soup.select('div.item'):
        for title in (item.select("div.list-title")):
            if title.text == "機關名稱":
                school = (item.select_one("div.list-content"))
        name = school.text.strip()
        level = item.select_one("span.item-level").text.strip()
        date = (item.select_one("span.item-date")).text.strip()
        if "國小" in name or \
            "國中" in name or \
            "高中" in name or \
            "國民小學" in name or \
            "國民中學" in name or \
            "學校" in name or \
                "高級中學" in name:
            if not any(map(lambda school: school.name == name, schools)):
                school = School(name, level, date)
                schools.append(school)

 

利用sqlalchemy orm  連接sqlite 寫入資料


# 寫入資料庫
engine = create_engine("sqlite:///school.db", echo=True)
session = Session(engine)

for school in schools:
    stmt = select(School).where(School.name == school.name)
    if not session.scalars(stmt).first():
        session.add(school)
        session.commit()
    else:
        print("done." + school.name)

#讀出資料庫裡所有資料
stmt = select(School)
result =  session.scalars(stmt)
schools = []
for school in result:
   schools.append(school) 

 

輸出csv

 

i=1
with open('summary統計.csv', 'w', newline='', encoding="utf-8") as csvfile:
    # 建立 CSV 檔寫入器
    writer = csv.writer(csvfile)
    writer.writerow(["通過校數:"+ str(len(schools))])
    for school in schools:
            writer.writerow([i,school.name, school.level, school.date])
            i=i+1

csvfile.close

 

輸出pdf

 

pdf = FPDF()
# Add a Unicode free font
pdf.add_font(fname='TW-Kai-98_1.ttf')
pdf.set_font('TW-Kai-98_1', size=16)

pdf.add_page()
pdf.cell(0, 20, 'summary統計', new_x="LMARGIN", new_y="NEXT", align='C')

pdf.set_font('TW-Kai-98_1', size=12)

x1 = 100
x2 = 40
x3 = 40
sn = 5
pdf.set_draw_color(0, 80, 180)
pdf.cell(sn, 10, " ", border="B")
pdf.cell(x1, 10, summary, border="B")
pdf.cell(x2, 10, "", border="B")
pdf.cell(x3, 10, "", border="B", new_x="LMARGIN", new_y="NEXT")
i = 0
for school in schools:
    i = i+1
    pdf.cell(sn, 10, str(i))
    pdf.cell(x1, 10, school.name)
    pdf.cell(x2, 10, school.level)
    pdf.cell(x3, 10, school.date, new_x="LMARGIN", new_y="NEXT")
pdf.output('summary統計.pdf')

Cell width. If 0, the cell extends up to the right margin.

x1 = 100, 表示這個cell 往右延伸 100個單位

下一個x2 就會從x1的寬度再開始計算40

http://www.fpdf.org/en/doc/cell.htm

 

參考

https://py-pdf.github.io/fpdf2/Tutorial.html