Mercurial > audlegacy
view Plugins/General/scrobbler/tags/unicode.c @ 1176:6549a4c58e15 trunk
[svn] - grr
author | nenolod |
---|---|
date | Sun, 11 Jun 2006 20:29:00 -0700 |
parents | 4be4d74db123 |
children | 62726fb1cb3b |
line wrap: on
line source
/* * libmetatag - A media file tag-reader library * Copyright (C) 2003, 2004 Pipian * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include <stdlib.h> #include <wchar.h> #include <string.h> #include "include/endian.h" #include "include/unicode.h" #include "audacious/util.h" wchar_t *utf8_to_wchar(unsigned char *utf, size_t memsize) { size_t i; int j = 0; wchar_t *mem; mem = calloc(sizeof(wchar_t) * (memsize + 1), 1); for(i = 0; i < memsize;) { if(utf[i] < 0x80) mem[j++] = utf[i++]; else if(utf[i] < 0xE0) { mem[j++] = ((utf[i] & 0x1F) << 6) | (utf[i + 1] & 0x3F); i += 2; } else if(utf[i] < 0xF0) { mem[j++] = ((utf[i] & 0x0F) << 12) | ((utf[i + 1] & 0x3F) << 6) | (utf[i + 2] & 0x3F); i += 3; } else if(utf[i] < 0xF8) { mem[j++] = ((utf[i] & 0x07) << 18) | ((utf[i + 1] & 0x3F) << 12) | ((utf[i + 2] & 0x3F) << 6) | (utf[i + 2] & 0x3F); i += 4; } else if(utf[i] < 0xFC) { mem[j++] = ((utf[i] & 0x03) << 24) | ((utf[i + 1] & 0x3F) << 18) | ((utf[i + 2] & 0x3F) << 12) | ((utf[i + 3] & 0x3F) << 6) | (utf[i + 4] & 0x3F); i += 5; } else if(utf[i] >= 0xFC) { mem[j++] = ((utf[i] & 0x01) << 30) | ((utf[i + 1] & 0x3F) << 24) | ((utf[i + 2] & 0x3F) << 18) | ((utf[i + 3] & 0x3F) << 12) | ((utf[i + 4] & 0x3F) << 6) | (utf[i + 5] & 0x3F); i += 6; } } mem = realloc(mem, sizeof(wchar_t) * (j + 1)); return mem; } unsigned char *wchar_to_utf8(wchar_t *wchar, size_t memsize) { size_t i; unsigned char *mem, *ptr; mem = calloc(memsize * 6 + 1, 1); ptr = mem; for(i = 0; i < memsize; i++) { if(wchar[i] < 0x80) { *ptr++ = wchar[i] & 0x7F; } else if(wchar[i] < 0x800) { *ptr++ = 0xC0 | ((wchar[i] >> 6) & 0x1F); *ptr++ = 0x80 | (wchar[i] & 0x3F); } else if(wchar[i] < 0x10000) { *ptr++ = 0xE0 | ((wchar[i] >> 12) & 0x0F); *ptr++ = 0x80 | ((wchar[i] >> 6) & 0x3F); *ptr++ = 0x80 | (wchar[i] & 0x3F); } else if(wchar[i] < 0x200000) { *ptr++ = 0xF0 | ((wchar[i] >> 18) & 0x07); *ptr++ = 0x80 | ((wchar[i] >> 12) & 0x3F); *ptr++ = 0x80 | ((wchar[i] >> 6) & 0x3F); *ptr++ = 0x80 | (wchar[i] & 0x3F); } else if(wchar[i] < 0x4000000) { *ptr++ = 0xF8 | ((wchar[i] >> 24) & 0x03); *ptr++ = 0x80 | ((wchar[i] >> 18) & 0x3F); *ptr++ = 0x80 | ((wchar[i] >> 12) & 0x3F); *ptr++ = 0x80 | ((wchar[i] >> 6) & 0x3F); *ptr++ = 0x80 | (wchar[i] & 0x3F); } else if((unsigned long)wchar[i] < 0x80000000) { *ptr++ = 0xFC | ((wchar[i] >> 30) & 0x01); *ptr++ = 0x80 | ((wchar[i] >> 24) & 0x3F); *ptr++ = 0x80 | ((wchar[i] >> 18) & 0x3F); *ptr++ = 0x80 | ((wchar[i] >> 12) & 0x3F); *ptr++ = 0x80 | ((wchar[i] >> 6) & 0x3F); *ptr++ = 0x80 | (wchar[i] & 0x3F); } } mem = realloc(mem, ptr - mem + 1); return mem; } void iso88591_to_utf8(unsigned char *iso, size_t memsize, unsigned char **utf) { *utf = str_to_utf8(iso); } #if 0 void iso88591_to_utf8(unsigned char *iso, size_t memsize, unsigned char **utf) { size_t i; wchar_t *wchar; wchar = calloc(sizeof(wchar_t) * (memsize + 1), 1); for(i = 0; i < memsize; i++) wchar[i] = iso[i]; *utf = wchar_to_utf8(wchar, memsize); free(wchar); } #endif void utf16bom_to_utf8(unsigned char *utf16, size_t memsize, unsigned char **utf) { wchar_t *wchar; unsigned char utf16char[2]; int endian = 0; size_t i; wchar = calloc(sizeof(wchar_t) * memsize / 2 - 1, 1); for(i = 0; i < memsize; i += 2) { if(i == 0) { if(utf16[i] == 0xFF) endian = 0; else if(utf16[i] == 0xFE) endian = 1; } else { utf16char[0] = utf16[i]; utf16char[1] = utf16[i + 1]; if(endian == 1) wchar[i / 2 - 1] = be2short(utf16char); else if(endian == 0) wchar[i / 2 - 1] = le2short(utf16char); } } *utf = wchar_to_utf8(wchar, memsize / 2 - 1); free(wchar); } void utf16be_to_utf8(unsigned char *utf16, size_t memsize, unsigned char **utf) { wchar_t *wchar; unsigned char utf16char[2]; size_t i; wchar = calloc(sizeof(wchar_t) * (memsize / 2), 1); for(i = 0; i < memsize; i += 2) { utf16char[0] = utf16[i]; utf16char[1] = utf16[i + 1]; wchar[i / 2] = be2short(utf16char); } *utf = wchar_to_utf8(wchar, memsize / 2); free(wchar); } void utf16le_to_utf8(unsigned char *utf16, size_t memsize, unsigned char **utf) { wchar_t *wchar; unsigned char utf16char[2]; size_t i; wchar = calloc(sizeof(wchar_t) * (memsize / 2), 1); for(i = 0; i < memsize; i += 2) { utf16char[0] = utf16[i]; utf16char[1] = utf16[i + 1]; wchar[i / 2] = le2short(utf16char); } *utf = wchar_to_utf8(wchar, memsize / 2); free(wchar); }