efinf:blcks2017:bitsundbytes:utf8

Vervollständigen Sie folgenden Code:

utf8html.py
def to_utf8(n):
    # hier die UTF8-Byte-Sequenz als String generieren
 
def header():
    return '<!DOCTYPE html><html><head><meta charset="UTF-8"/></head><body>'
 
def footer():
    return '</body></html>'
 
def shownice(code, title=""):
    utf8 = to_utf8(code);
    res = utf8+" &nbsp; &nbsp; "+ title+" <b>Unicode</b> "+str(code)+" = "+hex(code)+ " = "+bin(code)+"  &nbsp; <b>UTF8-Codierung</b>: "
    for i in range(len(utf8)):
        res += str(ord(utf8[i])) + "  "
    res += " <b>hex</b>: "
    for i in range(len(utf8)):
        res += hex(ord(utf8[i])) + "  "
    res += " <b>binary</b>: "
    for i in range(len(utf8)):
        res += bin(ord(utf8[i])) + "  "
    return res
 
 
out = open("uft8test.html","w")
out.write(header())
out.write(shownice(0x260e,"Telefon"))
out.write(footer())
out.close()

Lösungsvorschlag

Lösungsvorschlag

def to_utf8(n):
    if (n<128):
        return chr(n)
    if (n<0x7ff):
        b1 = 0b11000000 | (n >> 6)
        b2 = 0b10000000 | (n & 0x3f)
        return chr(b1)+chr(b2)
    if (n<0xffff):
        b1 = 0b11100000 | (n >> 12)
        b2 = 0b10000000 | ((n>>6) & 0x3f)
        b3 = 0b10000000 | (n & 0x3f)
        return chr(b1)+chr(b2)+chr(b3)
    else:
        b1 = 0b11110000 | (n >> 18)
        b2 = 0b10000000 | ((n>>12) & 0x3f)
        b3 = 0b10000000 | ((n>>6) & 0x3f)
        b4 = 0b10000000 | (n & 0x3f)
        return chr(b1)+chr(b2)+chr(b3)+chr(b4)
  • efinf/blcks2017/bitsundbytes/utf8.txt
  • Last modified: 2017/11/21 09:58
  • by Ivo Blöchliger