python導(dǎo)入數(shù)據(jù)工具

前段時(shí)間為了ICP證書,寫了一個(gè)以node.js為后臺(tái),vue為前臺(tái)頁(yè)面的小項(xiàng)目。眾所周知在小的項(xiàng)目它也要數(shù)據(jù)來(lái)支撐,這不是被逼著搗鼓如何快速的導(dǎo)入數(shù)據(jù)到數(shù)據(jù)庫(kù),我這種方法不一定是最好的,但是在現(xiàn)階段我覺的是最有意思的。

項(xiàng)目比較簡(jiǎn)單,大佬勿噴,第一次用python寫東西多多包涵!有錯(cuò)誤的地方希望大神指點(diǎn)。

簡(jiǎn)單的介紹一下使用到庫(kù)

  • pymysql:用來(lái)鏈接操作數(shù)據(jù)庫(kù)
  • xlwt :導(dǎo)出xls文件
  • pandas:數(shù)據(jù)清理,及插入(非常強(qiáng)大的一個(gè)庫(kù))

話不多說(shuō)直接上代碼

#!/usr/bin/python3


import pymysql
import requests
import json
import xlwt
import os
import shutil

import pandas as pd

import qtui.dao.path as path


from sqlalchemy import create_engine


class DatabaseAccess():

    def __init__(self) -> None:
        print("初始成功,數(shù)據(jù)庫(kù)連接信息")
        # self.isConnectionOpen()
        self.__db_host = "localhost"
        self.__db_port = 3306
        self.__db_user = "root"
        self.__db_password = "123456"
        self.__db_database = "storedb"

    # 爬取數(shù)據(jù)
    def getdataforurl(self):
        print("爬取數(shù)據(jù),自己寫吧,我扒的就不展示了")


    # 鏈接數(shù)據(jù)庫(kù)
    def isConnectionOpen(self):
        self.__db = pymysql.connect(
            host=self.__db_host,
            port=self.__db_port,
            user=self.__db_user,
            password=self.__db_password,
            database=self.__db_database,
            charset='utf8'
        )

    # 插入數(shù)據(jù)
    def linesinsert(self, data):
        try:
            # 連接數(shù)據(jù)庫(kù)
            self.isConnectionOpen()
            # 使用 cursor() 方法創(chuàng)建一個(gè)游標(biāo)對(duì)象 cursor
            cursor = self.__db.cursor()
            # 插入數(shù)據(jù)語(yǔ)句 簡(jiǎn)單的sql語(yǔ)句 插入數(shù)據(jù)庫(kù)(后面用pandas好像更方便在handle_data方法中)

            # 軟件供應(yīng)
            # query = """insert into software (id, software_introduction, applicable_industries,category,contacts,display_title,mail_box,qq,software_price,team_city,team_joining_time,team_logo,team_name,telephone,vps_table_name) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
            # values = (data["id"], data["software_introduction"], data["applicable_industries"], data["category"], data["contacts"], data["display_title"], data["mail_box"],
            #           data["qq"], data["software_price"], data["team_city"], data["team_joining_time"], data["team_logo"], data["team_name"], data["telephone"], data["vps_table_name"])
            # 找團(tuán)隊(duì)
            # query = """insert into team (id, case_work, city,contacts,display_title,image,joining_time,mail_box,qq,specialty,team_introduction,telephone,vps_table_name,name_of_employer) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
            # values = (data["id"], data["case_work"], data["city"], data["contacts"], data["display_title"], data["image"], data["joining_time"], data["mail_box"],
            #           data["qq"], data["specialty"], data["team_introduction"],  data["telephone"], data["vps_table_name"], "Sky666")
            # 找項(xiàng)目
            query = """insert into project (id,  city,display_title,deadline,development_cycle,project_budget,employer_joining_time,end_of_bidding,item_no,project_classification,project_introduction,project_status,release_time,vps_table_name,image_header,name_of_employer) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
            values = (data["id"], data["city"], data["display_title"], data["deadline"], data["development_cycle"], data["project_budget"],
                      data["employer_joining_time"], data["end_of_bidding"], data["item_no"],  data["project_classification"], data["project_introduction"], data["project_status"], data["release_time"], data["vps_table_name"], "http://www.hfwish.com/theme/wrf_wbfb_v2/images/1-191031162301Y9.jpg", "Sky666")
            cursor.execute(query, values)
        except Exception as e:
            print(e)
        finally:
            # 關(guān)閉數(shù)據(jù)庫(kù)連接
            cursor.close()
            self.__db.commit()
            self.__db.close()

    # 查詢所有表的名稱
    def check_table_name(self):
        print("查詢表名中....")

        results = []
        try:
            self.isConnectionOpen()
            with self.__db.cursor() as cursor:

                sql = '''SHOW TABLES'''
                cursor.execute(sql)
                result = cursor.fetchall()
                for i in range(len(result)):
                    results.append(result[i][0])
        except Exception as e:
            print(e)
        finally:
            # 關(guān)閉數(shù)據(jù)庫(kù)連接
            cursor.close()
            self.__db.commit()
            self.__db.close()
        print("查詢成功==========================")
        return results
    # 根據(jù)表名導(dǎo)出示例模板

    def export(self, table_name):
        print("導(dǎo)出模板")
        try:
            # 連接數(shù)據(jù)庫(kù)
            self.isConnectionOpen()
            # 使用 cursor() 方法創(chuàng)建一個(gè)游標(biāo)對(duì)象 cursor
            cursor = self.__db.cursor()
            sql = 'select * from %s;' % table_name
            cursor.execute(sql)  # 執(zhí)行sql
            fileds = [filed[0] for filed in cursor.description]  # 所有的字段
            all_data = cursor.fetchall()
            book = xlwt.Workbook(encoding='utf-8')

            # 背景色--淡綠色
            patternLightGreen = xlwt.Pattern()
            patternLightGreen.pattern = xlwt.Pattern.SOLID_PATTERN
            patternLightGreen.pattern_fore_colour = 42
            styleLightGreen = xlwt.XFStyle()
            styleLightGreen.pattern = patternLightGreen

            sheet = book.add_sheet(table_name)
            for col, filed in enumerate(fileds):  # 寫表頭的,
                sheet.write(0, col, filed, styleLightGreen)  # xls表頭
            # print(all_data)
            if len(all_data) > 0:
                row = 1  # 第一行
                for col, filed in enumerate(all_data[len(all_data)-1]):  # 控制列
                    sheet.write(row, col, filed)
            book.save('%s模板.xls' % table_name)
            bPath = os.getcwd()
            # 獲取當(dāng)前文件路徑
            file_path = os.path.join(bPath, '%s模板.xls' %
                                     table_name)
            # 移動(dòng)文件到E盤地方
            target_path = os.path.join(path.GetDir.get_BASE_DIR3("dataHook"))
            # 使用shutil包的move方法移動(dòng)文件
            shutil.move(file_path, target_path)
            print("導(dǎo)出%s表模板成功!" % table_name,
                  "------------------------------------------文件路勁:E:\ICP\ICP-Date\dataHook")
        finally:
            # 關(guān)閉數(shù)據(jù)庫(kù)連接
            cursor.close()
            self.__db.commit()
            self.__db.close()

    # 根據(jù)表名導(dǎo)出所有數(shù)據(jù)
    def exportAll(self, table_name):
        print("導(dǎo)出表結(jié)構(gòu)")
        try:
            # 連接數(shù)據(jù)庫(kù)
            self.isConnectionOpen()
            # 使用 cursor() 方法創(chuàng)建一個(gè)游標(biāo)對(duì)象 cursor
            cursor = self.__db.cursor()
            sql = 'select * from %s;' % table_name
            cursor.execute(sql)  # 執(zhí)行sql
            fileds = [filed[0] for filed in cursor.description]  # 所有的字段
            all_data = cursor.fetchall()
            book = xlwt.Workbook(encoding='utf-8')

            # 背景色--淡綠色
            patternLightGreen = xlwt.Pattern()
            patternLightGreen.pattern = xlwt.Pattern.SOLID_PATTERN
            patternLightGreen.pattern_fore_colour = 42
            styleLightGreen = xlwt.XFStyle()
            styleLightGreen.pattern = patternLightGreen

            sheet = book.add_sheet('sheet1')
            for col, filed in enumerate(fileds):  # 寫表頭的,
                sheet.write(0, col, filed, styleLightGreen)  # xls表頭
            # print(all_data)
            row = 1  # 第一行
            for data in all_data:  # 行
                for col, filed in enumerate(data):  # 控制列
                    sheet.write(row, col, filed)
                row += 1  # 每次寫完一行,行數(shù)加一

            # print("導(dǎo)出%s表成功!" % table_name,
            #       "------------------------------------------")
            book.save('%s.xls' % table_name)

            bPath = os.getcwd()

            # 獲取當(dāng)前文件路徑
            file_path = os.path.join(bPath, '%s.xls' % table_name)
            # 移動(dòng)文件到E盤地方

            target_path = os.path.join(path.GetDir.get_BASE_DIR3("dataHook"))
            # 使用shutil包的move方法移動(dòng)文件
            shutil.move(file_path, target_path)
            print("導(dǎo)出%s表成功!" % table_name,
                  "------------------------------------------文件路勁:E:\ICP\ICP-Date\dataHook")
        finally:
            # 關(guān)閉數(shù)據(jù)庫(kù)連接
            cursor.close()
            self.__db.commit()
            self.__db.close()

    # 查詢數(shù)據(jù)
    def check_date(self):
        # 重新建立數(shù)據(jù)庫(kù)連接
        self.isConnectionOpen()
        cursor = self.__db.cursor()
        # 查詢數(shù)據(jù)庫(kù)并打印內(nèi)容
        cursor.execute('''select * from catering_sale''')
        results = cursor.fetchall()
        for row in results:
            print(row)
        # 關(guān)閉
        cursor.close()
        self.__db.commit()
        self.__db.close()

    # 讀取xls
    def read_excl(self, table_name):
        print("開始讀取文件")
        allFile = []
        try:
            allFile = path.GetDir.file_name(
                path.GetDir.get_BASE_DIR3("inserData"), table_name+"模板")
        except Exception as e:
            print("讀取文件報(bào)錯(cuò)==========================")
            print(e)
        finally:

            if len(allFile) == 0:
                print("未查詢到%s模板.xls文件" % table_name, "請(qǐng)檢查inserData文件夾")
                return False
            else:
                print("所有%s.xls文件路徑" % table_name, allFile)
            # select = input("查詢到多個(gè)文件,請(qǐng)選擇第幾個(gè)? ")
                return allFile[0]

    # 讀取根據(jù)文件路徑處理數(shù)據(jù)
    def handle_data(self, paths, name):
        # 開始清理數(shù)據(jù)==============================
        print("開始清理數(shù)據(jù)==============================path:", paths)
        rPath = os.path.join(paths)
        print("rPath", rPath)
        df = pd.read_excel(rPath)  # 使用pd讀取數(shù)據(jù)

        # 清洗數(shù)據(jù): 沒有列頭, 缺失值,空行,重復(fù)數(shù)據(jù),非ASCII 字符(沒弄),
        df.dropna(axis=0, how='any', inplace=True)  # 刪除有空行
        df.drop_duplicates("id", "first", inplace=True)  # 刪除Id重復(fù)數(shù)據(jù)0

        print(df.to_string())

        print("清理數(shù)據(jù)完成===================================最終數(shù)據(jù)")

        print("開始插入數(shù)據(jù)庫(kù)=================================START")

        try:
            self.isConnectionOpen()
            db_info = {"host": self.__db_host,
                       "port": self.__db_port,
                       "user": self.__db_user,
                       "password": self.__db_password,
                       "database": self.__db_database,
                       "charset": 'utf8'}

            engine = create_engine(
                'mysql+pymysql://%(user)s:%(password)s@%(host)s:%(port)d/%(database)s?charset=utf8' % db_info, encoding='utf-8')
            df.to_sql(name, con=engine,
                      if_exists='append', index=False)

            target_path = os.path.join(path.GetDir.get_BASE_DIR3("finalData"))
            target_path = target_path + "%s.csv" % name
            df.to_csv(target_path, encoding='utf-8', index=False)  # 寫成csv文件保存

        except Exception as e:
            print(e, "異常數(shù)據(jù)")

        finally:
            # 關(guān)閉數(shù)據(jù)庫(kù)連接

            self.__db.commit()
            self.__db.close()
            print("插入數(shù)據(jù)結(jié)束")


if __name__ == "__main__":
    # 創(chuàng)建實(shí)例化對(duì)象
    db = DatabaseAccess()
    # db.linesinsert()
    # db.check_date()
    # db.getdataforurl()

    one = input("是否有匹配的模板數(shù)據(jù)表請(qǐng)輸入是/否:")
    if one == '是':
        table_name = db.check_table_name()
        print("當(dāng)前表:", table_name)
        name = input("輸入你要導(dǎo)入的表名稱:")
        paths = db.read_excl(name)
        if paths == False:
            print("-------------需要把對(duì)應(yīng)表的數(shù)據(jù).xls格式模板放入inserData文件夾下------------------------")
        else:
            db.handle_data(paths, name)
    else:
        table_name = db.check_table_name()
        print("當(dāng)前表:", table_name)
        name = input("輸入你要導(dǎo)出模板的表名稱:")
        db.export(name)

    # table_name = db.check_table_name()
    # print("當(dāng)前表:", table_name)
    # name = input("選擇你要導(dǎo)出的表名稱:")

    # 根據(jù)表名導(dǎo)出示例模板
    # db.export(name)
    # 根據(jù)表名導(dǎo)出說(shuō)有數(shù)據(jù)
    # db.exportAll("team")

    # 根據(jù)表名插入數(shù)據(jù)
    # paths = db.read_excl("team")
    # db.handle_data(paths, "team")

    # f = open(r"e:/ICP/ICP-Date/dataHook/team.xls", 'rb')
    # its_code = f.read()
    # print(chardet.detect(its_code))

哭~~~ 后來(lái)產(chǎn)品說(shuō)這個(gè)他沒環(huán)境怎么跑。沒辦法基于這個(gè)幾個(gè)簡(jiǎn)單的功能,做了一版帶GUI的程序。

晚點(diǎn)把鏈接放出來(lái)。。。。。

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容