dict = {} for i in df['学历要求']: if i not in dict.keys(): dict[i] = 0 else: dict[i] += 1 index = list(dict.keys()) print('index:',index) num = [] for i in index: num.append(dict[i]) print('num:',num) plt.bar(index, num, width=0.5) plt.savefig(city+'_'+kind+'_'+'学历要求.png') plt.show()

这段代码的作用是读取一个数据集中的学历要求列，统计每个学历要求出现的次数，并将其可视化为柱状图。其中，`df`是数据集，`city`和`kind`分别表示城市和工作类型。下面是代码的详细解释： - `dict = {}`：定义一个空字典`dict`，用于存储每个学历要求出现的次数。 - `for i in df['学历要求']:`：遍历数据集中的学历要求列，对于每个学历要求进行统计。 - `if i not in dict.keys(): dict[i] = 0 else: dict[i] += 1`：如果该学历要求不在字典`dict`中，则将其加入字典，并设置其出现次数为0；否则将其出现次数加1。 - `index = list(dict.keys())`：获取所有学历要求作为横坐标。 - `num = []`：定义一个空列表`num`，用于存储每个学历要求出现的次数。 - `for i in index: num.append(dict[i])`：遍历所有学历要求，将其出现次数加入列表`num`中。 - `plt.bar(index, num, width=0.5)`：绘制柱状图，横坐标为学历要求，纵坐标为出现次数，每个柱子的宽度为0.5。 - `plt.savefig(city+'_'+kind+'_'+'学历要求.png')`：保存柱状图，以城市、工作类型和学历要求作为文件名。 - `plt.show()`：显示柱状图。

if self.config.load_type == "INC": # adhoc hist job do not need to join landing merge table try: landing_merge_df = self.spark.read.format(self.config.destination_file_type). \ load(self.config.destination_data_path) # dataframe for updated records df = df.drop("audit_batch_id", "audit_job_id", "audit_src_sys_name", "audit_created_usr", "audit_updated_usr", "audit_created_tmstmp", "audit_updated_tmstmp") # dataframe for newly inserted records new_insert_df = df.join(landing_merge_df, primary_keys_list, "left_anti") self.logger.info(f"new_insert_df count: {new_insert_df.count()}") new_insert_df = DataSink_with_audit(self.spark).add_audit_columns(new_insert_df, param_dict) update_df = df.alias('l').join(landing_merge_df.alias('lm'), on=primary_keys_list, how="inner") update_df = update_df.select("l.*", "lm.audit_batch_id", "lm.audit_job_id", "lm.audit_src_sys_name", "lm.audit_created_usr", "lm.audit_updated_usr", "lm.audit_created_tmstmp", "lm.audit_updated_tmstmp") self.logger.info(f"update_df count : {update_df.count()}") update_df = DataSink_with_audit(self.spark).update_audit_columns(update_df, param_dict) # dataframe for unchanged records unchanged_df = landing_merge_df.join(df, on=primary_keys_list, how="left_anti") self.logger.info(f"unchanged_records_df count : {unchanged_df.count()}") final_df = new_insert_df.union(update_df).union(unchanged_df) print("final_df count : ", final_df.count()) except AnalysisException as e: if e.desc.startswith('Path does not exist'): self.logger.info('landing merge table not exists. will skip join landing merge') final_df = DataSink_with_audit(self.spark).add_audit_columns(df, param_dict) else: self.logger.error(f'unknown error: {e.desc}') raise e else: final_df = DataSink_with_audit(self.spark).add_audit_columns(df, param_dict) return final_df

这是一段Python代码，其中包含一个类方法的实现。该方法根据配置参数的不同，从一个特定的数据路径中将数据加载到一个Spark DataFrame中，并对该数据进行一些操作，最终返回一个具有审计列的DataFrame。如果配置参数是"INC"，则会执行一些数据合并的操作，包括添加、更新和未更改的记录，并对这些记录添加审计列。如果配置参数是其他值，则只会添加审计列。

def classification_report_to_dict(report): lines = report.split('\n') lines = [line.strip() for line in lines if line.strip()] classes = [] class_dict = {} for line in lines[1:]: t = line.split() if len(t) == 1: class_name = t[0] classes.append(class_name) class_dict[class_name] = {} else: class_dict[class_name]['precision'] = float(t[0]) class_dict[class_name]['recall'] = float(t[1]) class_dict[class_name]['f1-score'] = float(t[2]) class_dict[class_name]['support'] = int(t[3]) macro_avg = lines[-3].split() micro_avg = lines[-2].split() class_dict['macro avg'] = {'precision': float(macro_avg[1]), 'recall': float(macro_avg[2]), 'f1-score': float(macro_avg[3]), 'support': int(macro_avg[4])} class_dict['micro avg'] = {'precision': float(micro_avg[1]), 'recall': float(micro_avg[2]), 'f1-score': float(micro_avg[3]), 'support': int(micro_avg[4])} return class_dict def classification_report_from_dict(report_dict): classes = list(report_dict.keys()) classes.remove('macro avg') classes.remove('micro avg') lines = [' precision recall f1-score support\n\n'] for class_name in classes: line = f"{class_name.ljust(15)}{report_dict[class_name]['precision']:.2f} {report_dict[class_name]['recall']:.2f} {report_dict[class_name]['f1-score']:.2f} {report_dict[class_name]['support']:5d}\n" lines.append(line) lines.append('\n') macro_avg = report_dict['macro avg'] line = f"{'macro avg'.ljust(15)}{macro_avg['precision']:.2f} {macro_avg['recall']:.2f} {macro_avg['f1-score']:.2f} {macro_avg['support']:5d}\n" lines.append(line) micro_avg = report_dict['micro avg'] line = f"{'micro avg'.ljust(15)}{micro_avg['precision']:.2f} {micro_avg['recall']:.2f} {micro_avg['f1-score']:.2f} {micro_avg['support']:5d}\n" lines.append(line) report_str = ''.join(lines) return report_str for i, report in enumerate(report): report_dict[f'report_{i + 1}'] = classification_report_to_dict(report) report_df = pd.DataFrame.from_dict(report_dict, orient='index') avg_report_dict = report_df.mean().to_dict() avg_report_str = classification_report_from_dict(avg_report_dict) print(avg_report_str)出现local variable 'class_name' referenced before assignment怎么解决

这个错误通常是因为在使用变量之前没有对其进行初始化或赋值，或者在使用变量之前使用了删除、重命名或声明之前的变量名。在你提供的代码中，我无法确定错误具体出现在哪一行，但是最可能的原因是在 `classification_report_from_dict` 函数中的 `classes` 列表为空，导致 `class_name` 变量没有被初始化。你可以在调用 `classification_report_from_dict` 函数之前，检查 `report_dict` 是否包含正确的数据。你也可以在 `classification_report_from_dict` 函数中添加一些错误处理机制，以避免出现未定义变量的情况。

阅读全文

dict = {} for i in df['学历要求']: if i not in dict.keys(): dict[i] = 0 else: dict[i] += 1 index = list(dict.keys()) print('index:',index) num = [] for i in index: num.append(dict[i]) print('num:',num) plt.bar(index, num, width=0.5) plt.savefig(city+'_'+kind+'_'+'学历要求.png') plt.show()

相关推荐

高效压缩(dict.dz)工具：星际译王词库轻量化

Emacs极简主义者的必应词典插件：bing-dict.el

Python dict高级技巧：defaultdict与OrderedDict

python入门&实用方法.pdf

处理大型字典文件：RML2016.10a_dict.zip的高效策略

Python匿名函数与if语句：lambda表达式在条件逻辑中的妙用

【linecache：文件读取专家】：掌握高效读取每一行的秘诀

【Python库文件学习之odict】：性能优化：odict最佳实践和性能调优

【Python库文件学习之odict】：机器学习中的odict应用：专家案例分析

【Python库文件学习之odict】：Web开发中的odict应用：构建高效API

【Python库文件学习之odict】：数据分析中的odict应用：专家级指南

Python敏感JSON数据处理：安全第一

文件操作与数据结构：Python实战习题详解

rlcompleter与Jupyter Notebook：集成与使用技巧全揭秘

进阶用法：编写pprint的可复用美化打印函数

Python数据结构优化宝典：降低时间与空间复杂度

KeyError: 391788

大家在看

silvaco中文学习资料

AES128（CBC或者ECB）源码

EMC VNX 5300使用安装

华为MA5671光猫使用 华为MA5671补全shell 101版本可以补全shell，安装后自动补全，亲测好用，需要的可以下载

视频转换芯片 TP9950 iic 驱动代码

最新推荐

智慧园区3D可视化解决方案PPT(24页).pptx

掌握Android RecyclerView拖拽与滑动删除功能

【IBM HttpServer入门全攻略】：一步到位的安装与基础配置教程

[root@localhost~]#mount-tcifs-0username=administrator,password=hrb.123456//192.168.100.1/ygptData/home/win mount：/home/win：挂载点不存在

惠普8594E与IT8500系列电子负载使用教程

MATLAB与Python在SAR点目标仿真中的对决：哪种工具更胜一筹？

前端代理配置config.js配置proxyTable多个代理不生效

最小二乘法程序深入解析与应用案例

SAR点目标仿真应用指南：案例研究与系统设计实战

eclipse为项目配置jdk

华为MA5671光猫使用华为MA5671补全shell 101版本可以补全shell，安装后自动补全，亲测好用，需要的可以下载