# Die untersten 6 Bit mit
# höchstem Bit gesetzt als string
def folgebyte(byte)
    ((byte & 0b11_1111) | 0b1000_0000).chr
end

# Eingabe: Zahl (Unicode des Symbols)
# Ausgabe: String mit diesem Zeichen
def to_utf8(unicode)
  utf8=""     # Leerer String
  # String aufbauen
  if (unicode<0x80)  # pure ASCII
      utf8 = unicode.chr
  elsif unicode<0x800
      byte1 = ((unicode >> 6) & 0b1_1111) | 0b1100_0000
      utf8 = byte1.chr + folgebyte(unicode)
  elsif unicode < 0x10000
      byte1 = ((unicode >> 12) & 0b1_1111) | 0b1110_0000
      utf8 = byte1.chr + folgebyte(unicode >> 6) + folgebyte(unicode) 
  else
      byte1 = ((unicode >> 18) & 0b1_1111) | 0b1111_0000
      utf8 = byte1.chr + folgebyte(unicode >> 12) + folgebyte(unicode >> 6) + folgebyte(unicode) 
  end
  # Dem String sagen er sei UTF-8 kodiert (sonst ASCII-8Bit)
  utf8.force_encoding(Encoding::UTF_8)
end
 
# Eingabe: String mit einem Unicode-Zeichen
# Ausgabe: Zahl (entsprechende Unicode-Nummer)
def to_unicode(utf8)
   b = utf8.bytes    # Ein Array mit entsprechenden Bytes
   # Resultat der Funktion: Resultat des letzten Ausdrucks.
   # In diesem Fall das Resultat des case-statements.
   # Resultat von case: 
   # Letztes Resultat des entsprechenden
   # when.
   case b.size  # Anzahl Einträge
   when 1
       b[0]
   when 2
       (b[1] & 0b11_1111) | 
       ((b[0] & 0b1_1111) << 6)
   when 3
       (b[2] & 0b11_1111) | 
       ((b[1] & 0b11_1111) << 6) | 
       ((b[0] & 0b1111) << 12)       
   when 4
       (b[3] & 0b11_1111) | 
       ((b[2] & 0b11_1111) << 6) | 
       ((b[1] & 0b11_1111) << 12) | 
       ((b[0] & 0b111) << 18)
   end 
end
 
# Einige Tests:
"Aö♣♥✓✌计算机科学𝅘𝅥𝅮".each_char{|c|
  puts "Code von #{c} ist #{to_unicode(c)}  sollte sein: #{c.ord} = 0x#{c.ord.to_s(16)}"
}
 
[0x261E, 0x2744, 0x1D120, 0x1D160, 0x24, 0xA5, 0x20AC].each{|u|
  puts "Unicode 0x#{u.to_s(16)}=#{u} : #{to_utf8(u)}   und zurück: #{to_unicode(to_utf8(u))}"
}