字符计数

import os
import json
from collections import Counter

# 按字符计数
label_dir="/Users/thy/Downloads/chinese20240613"
zi_ls=[]
with open(os.path.join(label_dir,"Label.txt")) as f:
    lines=f.readlines()
    for line in lines:
        line = line.strip("\r\n")
        # print("line:",line)
        line1=line.split("\t")
        # print("line1:", line1[1])
        json_str = json.loads(line1[1])
        # 提取所有转录文本
        transcriptions = [item["transcription"] for item in json_str]
        print(transcriptions)
        transcriptions1=[]
        for trans in transcriptions:
            if len(trans)==1:
                transcriptions1.append(trans)
            else:
                trans=[char for char in trans]
                for tran in trans:
                    transcriptions1.append(tran)
        # print(transcriptions1)
        for zi in transcriptions1:
            zi_ls.append(zi)

# print("出现的字符串:",set(zi_ls))
char_counts = Counter(zi_ls)

# 打印结果
for char, count in char_counts.items():
    print(f"字符 '{char}' 出现了 {count} 次")

# 如果需要将结果存储到字典,可以直接使用char_counts
# 输出字典内容
print(char_counts)

字符出现的次数

import os
import json
from collections import Counter

# 按字符计数
label_dir="/Users/thy/Downloads/chinese20240613"
zi_ls=[]
with open(os.path.join(label_dir,"Label.txt")) as f:
    lines=f.readlines()
    for line in lines:
        line = line.strip("\r\n")
        # print("line:",line)
        line1=line.split("\t")
        # print("line1:", line1[1])
        json_str = json.loads(line1[1])
        # 提取所有转录文本
        transcriptions = [item["transcription"] for item in json_str]
        # print(transcriptions)
        transcriptions1=[]
        for trans in transcriptions:
            transcriptions1.append(trans)
        print(transcriptions1)
        for zi in transcriptions1:
            zi_ls.append(zi)

print("出现的字符串:",set(zi_ls))
char_counts = Counter(zi_ls)

# 打印结果
for char, count in char_counts.items():
    print(f"字符 '{char}' 出现了 {count} 次")

# 如果需要将结果存储到字典,可以直接使用char_counts
# 输出字典内容
print(char_counts)

Logo

AtomGit 是由开放原子开源基金会联合 CSDN 等生态伙伴共同推出的新一代开源与人工智能协作平台。平台坚持“开放、中立、公益”的理念,把代码托管、模型共享、数据集托管、智能体开发体验和算力服务整合在一起,为开发者提供从开发、训练到部署的一站式体验。

更多推荐