Python 中文数字转换
都什么年代了连个好的 Python 数字转中文的程序都找不到。所以只能重新发明轮子了。
这个程序按照四位一组的划分,倒序拼写数字,准确处理补零、十几、小数的问题,支持大写、繁体、〇、两的写法,最多整数部分可达 48 位。由于浮点数型限制,长整数和小数应传入 int, Decimal 或字符串型。缺点只有不支持浮点数中的科学记数法(可以自己分段传入)。Python 2 只需将所有字符串改成 unicode 即可使用。
授权:WTFPL 或 Unlicense 或 CC0,即公有领域授权,想干嘛干嘛。Gist 链接
import itertools
def num2chinese(num, big=False, simp=True, o=False, twoalt=False):
"""
Converts numbers to Chinese representations.
`big` : use financial characters.
`simp` : use simplified characters instead of traditional characters.
`o` : use 〇 for zero.
`twoalt`: use 两/兩 for two when appropriate.
Note that `o` and `twoalt` is ignored when `big` is used,
and `twoalt` is ignored when `o` is used for formal representations.
"""
# check num first
nd = str(num)
if abs(float(nd)) >= 1e48:
raise ValueError('number out of range')
elif 'e' in nd:
raise ValueError('scientific notation is not supported')
c_symbol = '正负点' if simp else '正負點'
if o: # formal
twoalt = False
if big:
c_basic = '零壹贰叁肆伍陆柒捌玖' if simp else '零壹貳參肆伍陸柒捌玖'
c_unit1 = '拾佰仟'
c_twoalt = '贰' if simp else '貳'
else:
c_basic = '〇一二三四五六七八九' if o else '零一二三四五六七八九'
c_unit1 = '十百千'
if twoalt:
c_twoalt = '两' if simp else '兩'
else:
c_twoalt = '二'
c_unit2 = '万亿兆京垓秭穰沟涧正载' if simp else '萬億兆京垓秭穰溝澗正載'
revuniq = lambda l: ''.join(k for k, g in itertools.groupby(reversed(l)))
nd = str(num)
result = []
if nd[0] == '+':
result.append(c_symbol[0])
elif nd[0] == '-':
result.append(c_symbol[1])
if '.' in nd:
integer, remainder = nd.lstrip('+-').split('.')
else:
integer, remainder = nd.lstrip('+-'), None
if int(integer):
splitted = [integer[max(i - 4, 0):i]
for i in range(len(integer), 0, -4)]
intresult = []
for nu, unit in enumerate(splitted):
# special cases
if int(unit) == 0: # 0000
intresult.append(c_basic[0])
continue
elif nu > 0 and int(unit) == 2: # 0002
intresult.append(c_twoalt + c_unit2[nu - 1])
continue
ulist = []
unit = unit.zfill(4)
for nc, ch in enumerate(reversed(unit)):
if ch == '0':
if ulist: # ???0
ulist.append(c_basic[0])
elif nc == 0:
ulist.append(c_basic[int(ch)])
elif nc == 1 and ch == '1' and unit[1] == '0':
# special case for tens
# edit the 'elif' if you don't like
# 十四, 三千零十四, 三千三百一十四
ulist.append(c_unit1[0])
elif nc > 1 and ch == '2':
ulist.append(c_twoalt + c_unit1[nc - 1])
else:
ulist.append(c_basic[int(ch)] + c_unit1[nc - 1])
ustr = revuniq(ulist)
if nu == 0:
intresult.append(ustr)
else:
intresult.append(ustr + c_unit2[nu - 1])
result.append(revuniq(intresult).strip(c_basic[0]))
else:
result.append(c_basic[0])
if remainder:
result.append(c_symbol[2])
result.append(''.join(c_basic[int(ch)] for ch in remainder))
return ''.join(result)
举例:
-1570712237615.5527负一兆五千七百零七亿一千二百二十三万七千六百一十五点五五二七110184432904.79883一千一百零一亿八千四百四十三万二千九百零四点七九八八三+4530692101492.777正四兆五千三百零六亿九千二百一十万一千四百九十二点七七七-8274510741923.016负八兆两千七百四十五亿一千零七十四万一千九百二十三点零一六744287603383.8828柒仟肆佰肆拾贰亿捌仟柒佰陆拾万叁仟叁佰捌拾叁点捌捌贰捌-8863371530966.008负捌兆捌仟陆佰叁拾叁亿柒仟壹佰伍拾叁万零玖佰陆拾陆点零零捌5096195844576.447伍兆零玖佰陸拾壹億玖仟伍佰捌拾肆萬肆仟伍佰柒拾陸點肆肆柒-6652253865814.708負陸兆陸仟伍佰貳拾貳億伍仟參佰捌拾陸萬伍仟捌佰壹拾肆點柒零捌