1 import xlrd,json 2 3 data = xlrd.open_workbook("C:\\Users\\zcb\\Desktop\\data.xlsx") 4 5 sheet1 = data.sheet_by_name("Sheet1") 6 final_data = [] 7 idx = 1 8 for i in range(51,59): 9 row = sheet1.row_values(i) 10 sentence ={} 11 sentence["sentence_id"] = row[0] 12 sentence["sentence"]=row[1].split("|") 13 sentence["语法笔记"]=row[2] 14 temp = row[3].split("|")[:-1] 15 temp_list = [] 16 for i in range(0,len(temp),8): 17 temp_dict = {} 18 temp_dict["word_id"]=idx 19 temp_dict["word"] = temp[i].strip(" \n") 20 temp_dict["pron"] = temp[i + 1].strip(" \n") 21 temp_dict["词性"] = temp[i + 2].strip(" \n") 22 if ‘$‘ in temp_dict["词性"]: 23 temp_dict["词性"] = [l.strip(" ") for l in temp_dict["词性"].split("$")] 24 temp_dict["记忆"] = temp[i + 3].strip(" \n") 25 if ‘$‘ in temp_dict["记忆"]: 26 temp_dict["记忆"] = [l.strip(" ") for l in temp_dict["记忆"].split("$")] 27 temp_dict["搭配"] = temp[i + 4].strip(" \n") 28 if ‘$‘ in temp_dict["搭配"]: 29 temp_dict["搭配"] = [l.strip(" ") for l in temp_dict["搭配"].split("$")] 30 temp_dict["同义"] = temp[i + 5].strip(" \n") 31 if ‘$‘ in temp_dict["同义"]: 32 temp_dict["同义"] = [l.strip(" ") for l in temp_dict["同义"].split("$")] 33 temp_dict["反义"] = temp[i + 6].strip(" \n") 34 if ‘$‘ in temp_dict["反义"]: 35 temp_dict["反义"] = [l.strip(" ") for l in temp_dict["反义"].split("$")] 36 temp_dict["同根"] = temp[i + 7].strip(" \n") 37 if ‘$‘ in temp_dict["同根"]: 38 temp_dict["同根"] = [l.strip(" ") for l in temp_dict["同根"].split("$")] 39 temp_list.append(temp_dict) 40 idx +=1 41 sentence["核心词表"] = temp_list 42 temp = row[4].split("|")[:-1] 43 temp_list = [] 44 for i in range(0,len(temp),8): 45 if "的词" in temp[i]: 46 topic = temp[i].split("的词")[0].strip(" \n")+"的词" 47 48 temp_dict = {} 49 temp_dict["主题"] = topic 50 temp_dict["word_id"] = idx 51 temp_dict["word"] = temp[i].split("的词")[-1].strip(": \n") 52 temp_dict["pron"] = temp[i + 1].strip(" \n") 53 temp_dict["词性"] = temp[i + 2].strip(" \n") 54 if ‘$‘ in temp_dict["词性"]: 55 temp_dict["词性"] = [ l.strip(" ") for l in temp_dict["词性"].split("$") ] 56 temp_dict["记忆"] = temp[i + 3].strip(" \n") 57 if ‘$‘ in temp_dict["记忆"]: 58 temp_dict["记忆"] = [ l.strip(" ") for l in temp_dict["记忆"].split("$") ] 59 temp_dict["搭配"] = temp[i + 4].strip(" \n") 60 if ‘$‘ in temp_dict["搭配"]: 61 temp_dict["搭配"] = [ l.strip(" ") for l in temp_dict["搭配"].split("$") ] 62 temp_dict["同义"] = temp[i + 5].strip(" \n") 63 if ‘$‘ in temp_dict["同义"]: 64 temp_dict["同义"] = [ l.strip(" ") for l in temp_dict["同义"].split("$") ] 65 temp_dict["反义"] = temp[i + 6].strip(" \n") 66 if ‘$‘ in temp_dict["反义"]: 67 temp_dict["反义"] = [ l.strip(" ") for l in temp_dict["反义"].split("$") ] 68 temp_dict["同根"] = temp[i + 7].strip(" \n") 69 if ‘$‘ in temp_dict["同根"]: 70 temp_dict["同根"] = [ l.strip(" ") for l in temp_dict["同根"].split("$") ] 71 idx+=1 72 temp_list.append(temp_dict) 73 sentence["主题归纳"] = temp_list 74 75 final_data.append(sentence) 76 with open("final_data.json","w",encoding="utf8") as f: 77 json.dump(final_data,f,ensure_ascii=False)
data.zip :https://files.cnblogs.com/files/zach0812/data.zip
原文地址:https://www.cnblogs.com/zach0812/p/12243558.html
时间: 2024-11-06 09:36:00