00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifdef HAVE_CONFIG_H
00023 # include "config.h"
00024 #endif
00025
00026 #include <stdlib.h>
00027 #include <string.h>
00028 #include <stringprep.h>
00029 #include <punycode.h>
00030
00031 #include "idna.h"
00032
00033
00034 #include <c-strcase.h>
00035
00036 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
00037 (c) == 0xFF0E || (c) == 0xFF61)
00038
00039
00040
00072 int
00073 idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
00074 {
00075 size_t len, outlen;
00076 uint32_t *src;
00077 int rc;
00078
00079
00080
00081
00082
00083
00084
00085
00086 {
00087 size_t i;
00088 int inasciirange;
00089
00090 inasciirange = 1;
00091 for (i = 0; i < inlen; i++)
00092 if (in[i] > 0x7F)
00093 inasciirange = 0;
00094 if (inasciirange)
00095 {
00096 src = malloc (sizeof (in[0]) * (inlen + 1));
00097 if (src == NULL)
00098 return IDNA_MALLOC_ERROR;
00099
00100 memcpy (src, in, sizeof (in[0]) * inlen);
00101 src[inlen] = 0;
00102
00103 goto step3;
00104 }
00105 }
00106
00107
00108
00109
00110
00111
00112 {
00113 char *p;
00114
00115 p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
00116 if (p == NULL)
00117 return IDNA_MALLOC_ERROR;
00118
00119 len = strlen (p);
00120 do
00121 {
00122 char *newp;
00123
00124 len = 2 * len + 10;
00125 newp = realloc (p, len);
00126 if (newp == NULL)
00127 {
00128 free (p);
00129 return IDNA_MALLOC_ERROR;
00130 }
00131 p = newp;
00132
00133 if (flags & IDNA_ALLOW_UNASSIGNED)
00134 rc = stringprep_nameprep (p, len);
00135 else
00136 rc = stringprep_nameprep_no_unassigned (p, len);
00137 }
00138 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00139
00140 if (rc != STRINGPREP_OK)
00141 {
00142 free (p);
00143 return IDNA_STRINGPREP_ERROR;
00144 }
00145
00146 src = stringprep_utf8_to_ucs4 (p, -1, NULL);
00147
00148 free (p);
00149 }
00150
00151 step3:
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163 if (flags & IDNA_USE_STD3_ASCII_RULES)
00164 {
00165 size_t i;
00166
00167 for (i = 0; src[i]; i++)
00168 if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
00169 (src[i] >= 0x3A && src[i] <= 0x40) ||
00170 (src[i] >= 0x5B && src[i] <= 0x60) ||
00171 (src[i] >= 0x7B && src[i] <= 0x7F))
00172 {
00173 free (src);
00174 return IDNA_CONTAINS_NON_LDH;
00175 }
00176
00177 if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
00178 {
00179 free (src);
00180 return IDNA_CONTAINS_MINUS;
00181 }
00182 }
00183
00184
00185
00186
00187
00188
00189 {
00190 size_t i;
00191 int inasciirange;
00192
00193 inasciirange = 1;
00194 for (i = 0; src[i]; i++)
00195 {
00196 if (src[i] > 0x7F)
00197 inasciirange = 0;
00198
00199 if (i < 64)
00200 out[i] = src[i];
00201 }
00202 if (i < 64)
00203 out[i] = '\0';
00204 if (inasciirange)
00205 goto step8;
00206 }
00207
00208
00209
00210
00211
00212
00213 {
00214 size_t i;
00215 int match;
00216
00217 match = 1;
00218 for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
00219 if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
00220 match = 0;
00221 if (match)
00222 {
00223 free (src);
00224 return IDNA_CONTAINS_ACE_PREFIX;
00225 }
00226 }
00227
00228
00229
00230
00231
00232 for (len = 0; src[len]; len++)
00233 ;
00234 src[len] = '\0';
00235 outlen = 63 - strlen (IDNA_ACE_PREFIX);
00236 rc = punycode_encode (len, src, NULL,
00237 &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
00238 if (rc != PUNYCODE_SUCCESS)
00239 {
00240 free (src);
00241 return IDNA_PUNYCODE_ERROR;
00242 }
00243 out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
00244
00245
00246
00247
00248
00249 memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
00250
00251
00252
00253
00254
00255
00256 step8:
00257 free (src);
00258 if (strlen (out) < 1 || strlen (out) > 63)
00259 return IDNA_INVALID_LENGTH;
00260
00261 return IDNA_SUCCESS;
00262 }
00263
00264
00265 static int
00266 idna_to_unicode_internal (char *utf8in,
00267 uint32_t * out, size_t * outlen, int flags)
00268 {
00269 int rc;
00270 char tmpout[64];
00271 size_t utf8len = strlen (utf8in) + 1;
00272 size_t addlen = 0;
00273
00274
00275
00276
00277
00278
00279
00280
00281 {
00282 size_t i;
00283 int inasciirange;
00284
00285 inasciirange = 1;
00286 for (i = 0; utf8in[i]; i++)
00287 if (utf8in[i] & ~0x7F)
00288 inasciirange = 0;
00289 if (inasciirange)
00290 goto step3;
00291 }
00292
00293
00294
00295
00296
00297
00298
00299 do
00300 {
00301 char *newp = realloc (utf8in, utf8len + addlen);
00302 if (newp == NULL)
00303 {
00304 free (utf8in);
00305 return IDNA_MALLOC_ERROR;
00306 }
00307 utf8in = newp;
00308 if (flags & IDNA_ALLOW_UNASSIGNED)
00309 rc = stringprep_nameprep (utf8in, utf8len + addlen);
00310 else
00311 rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
00312 addlen += 1;
00313 }
00314 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00315
00316 if (rc != STRINGPREP_OK)
00317 {
00318 free (utf8in);
00319 return IDNA_STRINGPREP_ERROR;
00320 }
00321
00322
00323
00324
00325
00326 step3:
00327 if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
00328 {
00329 free (utf8in);
00330 return IDNA_NO_ACE_PREFIX;
00331 }
00332
00333
00334
00335
00336 memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
00337 strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
00338
00339
00340
00341
00342
00343
00344 (*outlen)--;
00345
00346 rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
00347 if (rc != PUNYCODE_SUCCESS)
00348 {
00349 free (utf8in);
00350 return IDNA_PUNYCODE_ERROR;
00351 }
00352
00353 out[*outlen] = 0;
00354
00355
00356
00357
00358 rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
00359 if (rc != IDNA_SUCCESS)
00360 {
00361 free (utf8in);
00362 return rc;
00363 }
00364
00365
00366
00367
00368
00369 if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
00370 {
00371 free (utf8in);
00372 return IDNA_ROUNDTRIP_VERIFY_ERROR;
00373 }
00374
00375
00376
00377
00378 free (utf8in);
00379 return IDNA_SUCCESS;
00380 }
00381
00417 int
00418 idna_to_unicode_44i (const uint32_t * in, size_t inlen,
00419 uint32_t * out, size_t * outlen, int flags)
00420 {
00421 int rc;
00422 size_t outlensave = *outlen;
00423 char *p;
00424
00425 p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
00426 if (p == NULL)
00427 return IDNA_MALLOC_ERROR;
00428
00429 rc = idna_to_unicode_internal (p, out, outlen, flags);
00430 if (rc != IDNA_SUCCESS)
00431 {
00432 memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
00433 inlen : outlensave));
00434 *outlen = inlen;
00435 }
00436
00437
00438
00439 return rc;
00440 }
00441
00442
00443
00457 int
00458 idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
00459 {
00460 const uint32_t *start = input;
00461 const uint32_t *end = input;
00462 char buf[64];
00463 char *out = NULL;
00464 int rc;
00465
00466
00467
00468
00469
00470
00471 if (input[0] == 0)
00472 {
00473
00474 *output = malloc (1);
00475 if (!*output)
00476 return IDNA_MALLOC_ERROR;
00477 strcpy (*output, "");
00478 return IDNA_SUCCESS;
00479 }
00480
00481 if (DOTP (input[0]) && input[1] == 0)
00482 {
00483
00484 *output = malloc (2);
00485 if (!*output)
00486 return IDNA_MALLOC_ERROR;
00487 strcpy (*output, ".");
00488 return IDNA_SUCCESS;
00489 }
00490
00491 *output = NULL;
00492 do
00493 {
00494 end = start;
00495
00496 for (; *end && !DOTP (*end); end++)
00497 ;
00498
00499 if (*end == '\0' && start == end)
00500 {
00501
00502 buf[0] = '\0';
00503 }
00504 else
00505 {
00506 rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
00507 if (rc != IDNA_SUCCESS)
00508 return rc;
00509 }
00510
00511 if (out)
00512 {
00513 char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
00514 if (!newp)
00515 {
00516 free (out);
00517 return IDNA_MALLOC_ERROR;
00518 }
00519 out = newp;
00520 strcat (out, ".");
00521 strcat (out, buf);
00522 }
00523 else
00524 {
00525 out = (char *) malloc (strlen (buf) + 1);
00526 if (!out)
00527 return IDNA_MALLOC_ERROR;
00528 strcpy (out, buf);
00529 }
00530
00531 start = end + 1;
00532 }
00533 while (*end);
00534
00535 *output = out;
00536
00537 return IDNA_SUCCESS;
00538 }
00539
00553 int
00554 idna_to_ascii_8z (const char *input, char **output, int flags)
00555 {
00556 uint32_t *ucs4;
00557 size_t ucs4len;
00558 int rc;
00559
00560 ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
00561 if (!ucs4)
00562 return IDNA_ICONV_ERROR;
00563
00564 rc = idna_to_ascii_4z (ucs4, output, flags);
00565
00566 free (ucs4);
00567
00568 return rc;
00569
00570 }
00571
00586 int
00587 idna_to_ascii_lz (const char *input, char **output, int flags)
00588 {
00589 char *utf8;
00590 int rc;
00591
00592 utf8 = stringprep_locale_to_utf8 (input);
00593 if (!utf8)
00594 return IDNA_ICONV_ERROR;
00595
00596 rc = idna_to_ascii_8z (utf8, output, flags);
00597
00598 free (utf8);
00599
00600 return rc;
00601 }
00602
00617 int
00618 idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
00619 {
00620 const uint32_t *start = input;
00621 const uint32_t *end = input;
00622 uint32_t *buf;
00623 size_t buflen;
00624 uint32_t *out = NULL;
00625 size_t outlen = 0;
00626 int rc;
00627
00628 *output = NULL;
00629
00630 do
00631 {
00632 end = start;
00633
00634 for (; *end && !DOTP (*end); end++)
00635 ;
00636
00637 buflen = (size_t) (end - start);
00638 buf = malloc (sizeof (buf[0]) * (buflen + 1));
00639 if (!buf)
00640 return IDNA_MALLOC_ERROR;
00641
00642 rc = idna_to_unicode_44i (start, (size_t) (end - start),
00643 buf, &buflen, flags);
00644
00645
00646 if (out)
00647 {
00648 uint32_t *newp = realloc (out,
00649 sizeof (out[0])
00650 * (outlen + 1 + buflen + 1));
00651 if (!newp)
00652 {
00653 free (buf);
00654 free (out);
00655 return IDNA_MALLOC_ERROR;
00656 }
00657 out = newp;
00658 out[outlen++] = 0x002E;
00659 memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
00660 outlen += buflen;
00661 out[outlen] = 0x0;
00662 free (buf);
00663 }
00664 else
00665 {
00666 out = buf;
00667 outlen = buflen;
00668 out[outlen] = 0x0;
00669 }
00670
00671 start = end + 1;
00672 }
00673 while (*end);
00674
00675 *output = out;
00676
00677 return IDNA_SUCCESS;
00678 }
00679
00694 int
00695 idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
00696 {
00697 uint32_t *ucs4;
00698 size_t ucs4len;
00699 int rc;
00700
00701 ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
00702 if (!ucs4)
00703 return IDNA_ICONV_ERROR;
00704
00705 rc = idna_to_unicode_4z4z (ucs4, output, flags);
00706 free (ucs4);
00707
00708 return rc;
00709 }
00710
00725 int
00726 idna_to_unicode_8z8z (const char *input, char **output, int flags)
00727 {
00728 uint32_t *ucs4;
00729 int rc;
00730
00731 rc = idna_to_unicode_8z4z (input, &ucs4, flags);
00732 *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
00733 free (ucs4);
00734
00735 if (!*output)
00736 return IDNA_ICONV_ERROR;
00737
00738 return rc;
00739 }
00740
00756 int
00757 idna_to_unicode_8zlz (const char *input, char **output, int flags)
00758 {
00759 char *utf8;
00760 int rc;
00761
00762 rc = idna_to_unicode_8z8z (input, &utf8, flags);
00763 *output = stringprep_utf8_to_locale (utf8);
00764 free (utf8);
00765
00766 if (!*output)
00767 return IDNA_ICONV_ERROR;
00768
00769 return rc;
00770 }
00771
00788 int
00789 idna_to_unicode_lzlz (const char *input, char **output, int flags)
00790 {
00791 char *utf8;
00792 int rc;
00793
00794 utf8 = stringprep_locale_to_utf8 (input);
00795 if (!utf8)
00796 return IDNA_ICONV_ERROR;
00797
00798 rc = idna_to_unicode_8zlz (utf8, output, flags);
00799 free (utf8);
00800
00801 return rc;
00802 }
00803