Changeset 676 for trunk/ab5.0/ablib/src/Classes/System/Text
- Timestamp:
- Jan 13, 2009, 2:01:38 AM (15 years ago)
- Location:
- trunk/ab5.0/ablib/src/Classes/System/Text
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/ab5.0/ablib/src/Classes/System/Text/Encoding.ab
r655 r676 39 39 @brief 符号化して得られる文字列の長さを計算する。 40 40 @param[in] src 対象文字列 41 @param[in] srcCount srcの長さ 42 @return 符号化して得られる文字列の長さ 41 @param[in] srcCount srcの長さ(要素数単位) 42 @return 符号化して得られる文字列の長さ(要素数単位) 43 43 @date 2007/12/08 44 44 */ … … 55 55 @brief 符号化して得られる文字列の長さを計算する。 56 56 @param[in] src 対象文字列 57 @return 符号化して得られる文字列の長さ 57 @return 符号化して得られる文字列の長さ(要素数単位) 58 58 @date 2007/12/08 59 59 */ … … 66 66 #endif 67 67 68 Pr ivate68 Protected 69 69 /*! 70 70 @brief GetBytesCountの実装を行う。 71 71 @param[in] src 対象文字列 72 @param[in] srcCount srcの長さ 73 @return 符号化して得られる文字列の長さ 74 @date 2007/12/08 75 */ 76 Function GetBytesCountCore(src As *WCHAR, srcCount As Long) As Long 77 Dim enc = GetEncoder() 78 GetBytesCountCore = enc.GetBytesCount(src, srcCount, True) 79 End Function 72 @param[in] srcCount srcの長さ(要素数単位) 73 @return 符号化して得られる文字列の長さ(要素数単位) 74 @date 2007/12/08 75 */ 76 Abstract Function GetBytesCountCore(src As *WCHAR, srcCount As Long) As Long 80 77 Public 81 78 /*! 82 79 @brief 符号化する。 83 80 @param[in] src 入力 84 @param[in] srcCount srcの長さ 81 @param[in] srcCount srcの長さ(要素数単位) 85 82 @param[out] dst 出力 86 @param[in] dstCount dstのバッファの大きさ 87 @return dstに書き込まれた バイト数83 @param[in] dstCount dstのバッファの大きさ(要素数単位) 84 @return dstに書き込まれた要素数 88 85 @date 2007/12/08 89 86 */ … … 102 99 End Function 103 100 104 Pr ivate101 Protected 105 102 /*! 106 103 @brief GetBytesの処理を行う。 107 104 @param[in] src 入力 108 @param[in] srcCount srcの長さ 105 @param[in] srcCount srcの長さ(要素数単位) 109 106 @param[out] dst 出力 110 @param[in] dstCount dstのバッファの大きさ 111 @return dstに書き込まれた バイト数107 @param[in] dstCount dstのバッファの大きさ(要素数単位) 108 @return dstに書き込まれた要素数 112 109 @exception ArgumentException バッファの大きさが足りない 113 110 @date 2007/12/08 114 111 */ 115 Function GetBytesCore(src As *WCHAR, srcCount As Long, dst As *Byte, dstCount As Long) As Long 116 GetBytesCore = GetEncoder().GetBytes(src, srcCount, dst, dstCount, True) 117 End Function 112 Abstract Function GetBytesCore(src As *WCHAR, srcCount As Long, dst As *Byte, dstCount As Long) As Long 118 113 Public 119 114 /*! 120 115 @brief 復号して得られる文字列の長さを計算する。 121 116 @param[in] src 対象文字列 122 @param[in] srcCount srcの長さ 123 @return 復号して得られる文字列の長さ 117 @param[in] srcCount srcの長さ(要素数単位) 118 @return 復号して得られる文字列の長さ(要素数単位) 124 119 @date 2007/12/08 125 120 */ … … 133 128 End Function 134 129 135 Pr ivate130 Protected 136 131 /*! 137 132 @brief GetCharsCountの処理を行う。 138 133 @param[in] src 対象文字列 139 @param[in] srcCount srcの長さ 140 @return 符号化して得られる文字列の長さ 141 @date 2007/12/08 142 */ 143 Function GetCharsCountCore(src As *Byte, srcCount As Long) As Long 144 Dim dec = GetDecoder() 145 GetCharsCountCore = dec.GetCharsCount(src, srcCount, True) 146 End Function 134 @param[in] srcCount srcの長さ(要素数単位) 135 @return 符号化して得られる文字列の長さ(要素数単位) 136 @date 2007/12/08 137 */ 138 Abstract Function GetCharsCountCore(src As *Byte, srcCount As Long) As Long 147 139 Public 148 140 /*! 149 141 @brief 復号する。 150 142 @param[in] src 入力 151 @param[in] srcCount srcの長さ 143 @param[in] srcCount srcの長さ(要素数単位) 152 144 @param[out] dst 出力 153 @param[in] dstCount srcのバッファの大きさ 154 @return dstに書き込まれた バイト数145 @param[in] dstCount srcのバッファの大きさ(要素数単位) 146 @return dstに書き込まれた要素数 155 147 @date 2007/12/08 156 148 */ … … 169 161 End Function 170 162 171 Pr ivate163 Protected 172 164 /*! 173 165 @brief GetCharsの処理を行う。 174 166 @param[in] src 入力 175 @param[in] srcCount srcの長さ 167 @param[in] srcCount srcの長さ(要素数単位) 176 168 @param[out] dst 出力 177 @param[in] dstCount dstのバッファの大きさ 178 @return dstに書き込まれた バイト数169 @param[in] dstCount dstのバッファの大きさ(要素数単位) 170 @return dstに書き込まれた要素数 179 171 @exception ArgumentException バッファの大きさが足りない 180 172 @date 2007/12/08 181 173 */ 182 Function GetCharsCore(src As *Byte, srcCount As Long, dst As *WCHAR, dstCount As Long) As Long 183 GetCharsCore = GetDecoder().GetChars(src, srcCount, dst, dstCount, True) 184 End Function 174 Abstract Function GetCharsCore(src As *Byte, srcCount As Long, dst As *WCHAR, dstCount As Long) As Long 185 175 Public 186 176 #ifdef UNICODE … … 188 178 @brief 復号し、Stringで結果を返す。 189 179 @param[in] src 入力 190 @param[in] srcCount srcの長さ 180 @param[in] srcCount srcの長さ(要素数単位) 191 181 @return 変換結果の文字列 192 182 @date 2007/12/08 … … 214 204 Public 215 205 /*! 206 @brief 復号器を取得する。 207 */ 208 Abstract Function GetDecoder() As Decoder 209 210 /*! 216 211 @brief 符号器を取得する。 217 */218 Abstract Function GetDecoder() As Decoder219 220 /*!221 @brief 復号器を取得する。222 212 */ 223 213 Abstract Function GetEncoder() As Encoder … … 242 232 243 233 /*! 244 @brief GetPreambleの配列の要素数 234 @brief GetPreambleの配列の要素数。 245 235 */ 246 236 Virtual Function GetPreambleLength() As Long … … 248 238 End Function 249 239 240 Public 250 241 ' Abstract Function BodyName() As String 251 242 ' Abstract Function HeaderName() As String … … 333 324 /*! 334 325 @brief 復号を行うクラス 335 @date 200 7/12/19326 @date 2009/01/12 336 327 @auther Egtra 328 内部処理用 337 329 */ 338 330 Class Decoder 339 331 Public 340 /*! 341 @brief 変換する 342 @param[in] src 入力 343 @param[in] srcCount 入力要素数 344 @param[out] dst 出力 345 @param[in] dstCount 出力要素数 346 @param[in] flush 終了後に内部状態を初期化するかどうか 347 @param[out] srcUsed 使用された入力の要素数 348 @param[out] dstUsed 出力の内、実際に書き込まれた要素数 349 @param[out] completed 入力の全ての文字が変換に使われたかどうか 350 */ 351 Sub Convert(src As *Byte, srcCount As Long, 352 dst As *WCHAR, dstCount As Long, flush As Boolean, 353 ByRef srcUsed As Long, ByRef dstUsed As Long, ByRef completed As Boolean) 354 355 If src = 0 And srcCount > 0 Then 356 Throw New ArgumentNullException("src") 357 ElseIf srcCount < 0 Then 358 Throw New ArgumentOutOfRangeException("srcCount") 359 ElseIf dst = 0 Then 360 Throw New ArgumentNullException("dst") 361 ElseIf dstCount < 0 Then 362 Throw New ArgumentOutOfRangeException("dstCount") 363 End If 364 ConvertCore(src, srcCount, dst, dstCount, flush, srcUsed, dstUsed, completed) 332 Function Decode(dst As Collections.Generic.List<WCHAR>, st As IO.Stream) As Boolean 333 Decode = DecodeImpl(dst, st) 334 End Function 335 336 Protected 337 Abstract Function DecodeImpl(dst As Collections.Generic.List<WCHAR>, s As IO.Stream) As Boolean 338 End Class 339 340 /*! 341 @brief 符号化を行うクラス 342 @date 2009/01/12 343 @auther Egtra 344 内部処理用 345 */ 346 Class Encoder 347 Public 348 Sub Encode(src As *WCHAR, size As SIZE_T, s As IO.Stream, last As Boolean) 349 EncodeImpl(src, size, s, last) 365 350 End Sub 366 351 367 /*! 368 @brief 変換する 369 @param[in] src 入力 370 @param[in] srcCount 入力要素数 371 @param[out] dst 出力 372 @param[in] dstCount 出力要素数 373 @param[in] flush 終了後に内部状態を初期化するかどうか 374 @return 出力の内、実際に書き込まれた要素数 375 */ 376 Function GetChars(src As *Byte, srcCount As Long, dst As *WCHAR, dstCount As Long, flush As Boolean) As Long 377 Dim srcUsed As Long 378 Dim completed As Boolean 379 Convert(src, srcCount, dst, dstCount, flush, srcUsed, GetChars, completed) 380 If srcUsed < srcCount Then 381 Throw New ArgumentException("src", "buffer is too small") 382 End If 383 End Function 384 385 /*! 386 @brief 変換すると何文字になるか数える 387 @param[in] src 入力 388 @param[in] srcCount 入力要素数 389 @param[in] flush 終了後に内部状態を初期化するとして計算するかどうか 390 @return 必要な文字数 391 */ 392 Function GetCharsCount(src As *Byte, srcCount As Long, flush As Boolean) As Long 393 If src = 0 Then 394 Throw New ArgumentNullException("src") 395 ElseIf srcCount < 0 Then 396 Throw New ArgumentOutOfRangeException("srcCount") 397 End If 398 GetCharsCountCore(src, srcCount, flush) 399 End Function 400 401 /*! 402 @brief 内部状態を初期状態に戻す 403 */ 404 Virtual Sub Reset() 405 End Sub 406 407 Protected 408 /*! 409 @brief 実際に変換する 410 @param[in] src 入力 411 @param[in] srcCount 入力要素数 412 @param[out] dst 出力 413 @param[in] dstCount 出力要素数 414 @param[in] flush 終了後に内部状態を初期化するかどうか 415 @param[out] dstUsed 使用された入力の要素数 416 @param[out] srcUsed 出力の内、実際に書き込まれた要素数 417 @param[out] completed 入力の全ての文字が変換に使われたかどうか 418 */ 419 Abstract Sub ConvertCore(src As *Byte, srcCount As Long, dst As *WCHAR, dstCount As Long, flush As Boolean, 420 ByRef srcUsed As Long, ByRef dstUsed As Long, ByRef completed As Boolean) 421 422 /*! 423 @brief 変換すると何文字になるか数える 424 @param[in] src 入力 425 @param[in] srcCount 入力要素数 426 @param[in] flush 終了後に内部状態を初期化するとして計算するかどうか 427 @return 必要な文字数 428 */ 429 Abstract Function GetCharsCountCore(src As *Byte, srcCount As Long, flush As Boolean) As Long 430 End Class 431 432 /*! 433 @brief 符号化を行うクラス 434 @date 2007/12/19 435 @auther Egtra 436 */ 437 Class Encoder 438 Public 439 /*! 440 @brief 変換する 441 @param[in] src 入力 442 @param[in] srcCount 入力要素数 443 @param[out] dst 出力 444 @param[in] dstCount 出力要素数 445 @param[in] flush 終了後に内部状態を初期化するかどうか 446 @param[out] srcUsed 使用された入力の要素数 447 @param[out] dstUsed 出力の内、実際に書き込まれた要素数 448 @param[out] completed 入力の全ての文字が変換に使われたかどうか 449 */ 450 Sub Convert(src As *WCHAR, srcCount As Long, 451 dst As *Byte, dstCount As Long, flush As Boolean, 452 ByRef srcUsed As Long, ByRef dstUsed As Long, ByRef completed As Boolean) 453 454 If src = 0 And srcCount > 0 Then 455 Throw New ArgumentNullException("src") 456 ElseIf srcCount < 0 Then 457 Throw New ArgumentOutOfRangeException("srcCount") 458 ElseIf dst = 0 Then 459 Throw New ArgumentNullException("dst") 460 ElseIf dstCount < 0 Then 461 Throw New ArgumentOutOfRangeException("dstCount") 462 End If 463 ConvertCore(src, srcCount, dst, dstCount, flush, srcUsed, dstUsed, completed) 464 End Sub 465 466 /*! 467 @brief 変換する 468 @param[in] src 入力 469 @param[in] srcCount 入力要素数 470 @param[out] dst 出力 471 @param[in] dstCount 出力要素数 472 @param[in] flush 終了後に内部状態を初期化するかどうか 473 @return 出力の内、実際に書き込まれた要素数 474 */ 475 Function GetBytes(src As *WCHAR, srcCount As Long, dst As *Byte, dstCount As Long, flush As Boolean) As Long 476 Dim srcUsed As Long 477 Dim completed As Boolean 478 Convert(src, srcCount, dst, dstCount, flush, srcUsed, GetBytes, completed) 479 If srcUsed < srcCount Then 480 Throw New ArgumentException("src", "buffer is too small") 481 End If 482 End Function 483 484 /*! 485 @brief 変換すると何文字になるか数える 486 @param[in] src 入力 487 @param[in] srcCount 入力要素数 488 @param[in] flush 終了後に内部状態を初期化するとして計算するかどうか 489 @return 必要な文字数 490 */ 491 Function GetBytesCount(src As *WCHAR, srcCount As Long, flush As Boolean) As Long 492 If src = 0 Then 493 Throw New ArgumentNullException("src") 494 ElseIf srcCount < 0 Then 495 Throw New ArgumentOutOfRangeException("srcCount") 496 End If 497 GetBytesCountCore(src, srcCount, flush) 498 End Function 499 500 /*! 501 @brief 内部状態を初期状態に戻す 502 */ 503 Virtual Sub Reset() 504 End Sub 505 506 Protected 507 /*! 508 @brief 実際に変換する 509 @param[in] src 入力 510 @param[in] srcCount 入力要素数 511 @param[out] dst 出力 512 @param[in] dstCount 出力要素数 513 @param[in] flush 終了後に内部状態を初期化するかどうか 514 @param[out] dstUsed 使用された入力の要素数 515 @param[out] srcUsed 出力の内、実際に書き込まれた要素数 516 @param[out] completed 入力の全ての文字が変換に使われたかどうか 517 */ 518 Abstract Sub ConvertCore(src As *WCHAR, srcCount As Long, dst As *Byte, dstCount As Long, flush As Boolean, 519 ByRef dstUsed As Long, ByRef srcUsed As Long, ByRef completed As Boolean) 520 521 /*! 522 @brief 変換すると何文字になるか数える 523 @param[in] src 入力 524 @param[in] srcCount 入力要素数 525 @param[in] flush 終了後に内部状態を初期化するとして計算するかどうか 526 @return 必要な文字数 527 */ 528 Abstract Function GetBytesCountCore(src As *WCHAR, srcCount As Long, flush As Boolean) As Long 352 Protected 353 Abstract Sub EncodeImpl(src As *WCHAR, size As SIZE_T, s As IO.Stream, last As Boolean) 529 354 End Class 530 355 … … 549 374 End Sub 550 375 551 /*!552 @brief 符号器を取得する。553 */554 Override Function GetDecoder() As Decoder555 GetDecoder = New WindowsCodePageDecoder(cp)556 End Function557 558 /*!559 @brief 復号器を取得する。560 */561 Override Function GetEncoder() As Encoder562 GetEncoder = New WindowsCodePageEncoder(cp)563 End Function564 565 376 Override Function GetHashCode() As Long 566 377 GetHashCode = cp As Long … … 576 387 End Function 577 388 389 Override Function GetDecoder() As Decoder 390 GetDecoder = New WindowsCodePageDecoder(cp) 391 End Function 392 393 Override Function GetEncoder() As Encoder 394 GetEncoder = New WindowsCodePageEncoder(cp) 395 End Function 396 578 397 /*! 579 398 @brief ある長さの文字列を符号化して得られるバイト列の最大の長さを返す。 … … 588 407 Override Function GetMaxCharCount(srcCount As Long) As Long 589 408 GetMaxCharCount = srcCount 409 End Function 410 411 Protected 412 Override Function GetBytesCountCore(src As *WCHAR, srcCount As Long) As Long 413 GetBytesCountCore = WideCharToMultiByte(cp, 0, src, srcCount, 0, 0, 0, 0) 414 If srcCount <> 0 And GetBytesCountCore = 0 Then 415 ActiveBasic.Windows.ThrowWithLastError() 416 End If 417 End Function 418 419 Override Function GetBytesCore(src As *WCHAR, srcCount As Long, dst As *Byte, dstCount As Long) As Long 420 GetBytesCore = WideCharToMultiByte(cp, 0, src, srcCount, dst As PCSTR, dstCount, 0, 0) 421 If srcCount <> 0 And GetBytesCore = 0 Then 422 ActiveBasic.Windows.ThrowWithLastError() 423 End If 424 End Function 425 426 Override Function GetCharsCountCore(src As *Byte, srcCount As Long) As Long 427 GetCharsCountCore = MultiByteToWideChar(cp, 0, src As PCSTR, srcCount, 0, 0) 428 If srcCount <> 0 And GetCharsCountCore = 0 Then 429 ActiveBasic.Windows.ThrowWithLastError() 430 End If 431 End Function 432 433 Override Function GetCharsCore(src As *Byte, srcCount As Long, dst As *WCHAR, dstCount As Long) As Long 434 GetCharsCore = MultiByteToWideChar(cp, 0, src As PCSTR, srcCount, dst, dstCount) 435 If srcCount <> 0 And GetCharsCore = 0 Then 436 ActiveBasic.Windows.ThrowWithLastError() 437 End If 590 438 End Function 591 439 … … 594 442 End Class 595 443 444 /*! 445 @brief WideCharToMultiByteで復号化を行うクラス 446 @date 2009/01/12 447 @auther Egtra 448 内部処理用 449 */ 596 450 Class WindowsCodePageEncoder 597 451 Inherits Encoder … … 601 455 End Sub 602 456 603 Override Sub Reset() 604 nextByte = 0 457 Protected 458 Override Sub EncodeImpl(src As *WCHAR, size As SIZE_T, s As IO.Stream, last As Boolean) 459 ' サロゲートペアや結合文字列 (Combining Character Sequence)の途中でバッファが途切れている場合に対応する 460 ' ToDo: エラー処理 461 Dim mbLen = WideCharToMultiByte(cp, 0, src, size, 0, 0, 0, 0) 462 Dim mbBuf = GC_malloc_atomic(mbLen) 463 WideCharToMultiByte(cp, 0, src, size, mbBuf, mbLen, 0, 0) 464 s.Write(mbBuf, 0, mbLen) 605 465 End Sub 606 607 Protected608 Override Sub ConvertCore(src As *WCHAR, srcCount As Long, dst As *Byte, dstCount As Long, flush As Boolean,609 ByRef srcUsed As Long, ByRef dstUsed As Long, ByRef completed As Boolean)610 611 Dim srcPos = 0 As Long612 Dim dstPos = 0 As Long613 If dstCount > 0 And nextByte <> 0 Then614 dst[0] = nextByte615 nextByte = 0616 dstPos++617 End If618 While srcPos < srcCount And dstPos < srcCount619 Dim buf[1] As CHAR620 Dim len = WideCharToMultiByte(cp, WC_COMPOSITECHECK Or WC_DEFAULTCHAR, VarPtr(src[srcPos]), 1, buf, Len(buf), 0, 0)621 If len = 0 Then622 ActiveBasic.Windows.ThrowWithLastError()623 End If624 dst[dstPos] = buf[0] As Byte625 If len = 2 Then626 If dstCount = 1 Then627 nextByte = buf[1] As Byte628 Exit While629 End If630 dstPos++631 dst[dstPos] = buf[1] As Byte632 nextByte = 0633 End If634 srcPos++635 dstPos++636 Wend637 srcUsed = srcPos638 dstUsed = dstPos639 completed = (srcPos = srcCount And dstPos = srcCount And nextByte = 0)640 End Sub641 642 Override Function GetBytesCountCore(src As *WCHAR, srcCount As Long, flush As Boolean) As Long643 GetBytesCountCore = srcCount * 2 + 1 '暫定644 End Function645 466 646 467 Private 647 468 cp As DWord 648 nextByte As Byte649 469 End Class 650 470 471 /*! 472 @brief MultiByteToWideCharで復号化を行うクラス 473 @date 2009/01/12 474 @auther Egtra 475 内部処理用 476 */ 651 477 Class WindowsCodePageDecoder 652 478 Inherits Decoder … … 657 483 658 484 Protected 659 Override Sub ConvertCore(src As *Byte, srcCount As Long, dst As *WCHAR, dstCount As Long, flush As Boolean, 660 ByRef srcUsed As Long, ByRef dstUsed As Long, ByRef completed As Boolean) 661 662 Dim srcPos = 0 As Long 663 Dim dstPos = 0 As Long 664 If dstCount > 0 And nextByte <> 0 Then 485 Override Function DecodeImpl(dst As Collections.Generic.List<WCHAR>, s As IO.Stream) As Boolean 486 Dim i As Long 487 For i = 0 To DefalultDecodingBufferSize - 1 'ELM 488 Dim len As Long 665 489 Dim buf[1] As CHAR 666 buf[0] = nextByte As CHAR 667 buf[1] = src[1] As CHAR 668 Dim len = MultiByteToWideChar(cp, 0, buf, Len(buf), VarPtr(dst[dstPos]), 10) 669 If len = 0 Then 670 ActiveBasic.Windows.ThrowWithLastError() 490 Dim t = s.ReadByte() 491 If t = -1 Then 492 DecodeImpl = False 493 Exit Function 671 494 End If 672 srcPos++ 673 dstPos++ 674 nextByte = 0 675 End If 676 While srcPos < srcCount And dstPos < srcCount 677 Dim srcCharSize = 1 As Long 678 If IsDBCSLeadByteEx(cp, src[srcPos]) Then 679 srcCharSize = 2 680 If srcPos + 1 = srcCount Then 681 nextByte = src[srcPos] 682 Exit While 495 buf[0] = t As CHAR 496 If IsDBCSLeadByteEx(cp, buf[0]) Then 497 t = s.ReadByte() 498 If t = -1 Then 499 dst.Add(&hfffd As WCHAR) 500 DecodeImpl = False 501 Exit For 683 502 End If 503 buf[1] = t As CHAR 504 len = 2 505 Else 506 len = 1 684 507 End If 685 '将来的には行毎に変換しMB_USEGLYPHCHARSを使うようにしたい。 686 Dim len = MultiByteToWideChar(cp, 0, VarPtr(src[srcPos]) As *CHAR, srcCharSize, VarPtr(dst[dstPos]), 1) 687 If len = 0 Then 688 ActiveBasic.Windows.ThrowWithLastError() 508 Dim wc As WCHAR 509 If MultiByteToWideChar(cp, 0, buf, len, VarPtr(wc), 1) = 0 Then 510 dst.Add(&hFFFD As WCHAR) 511 Else 512 dst.Add(wc) 689 513 End If 690 srcPos += srcCharSize 691 dstPos++ 692 Wend 693 srcUsed = srcPos 694 dstUsed = dstPos 695 completed = (srcPos = srcCount And dstPos = srcCount And nextByte = 0) 696 End Sub 697 698 Override Function GetCharsCountCore(src As *Byte, srcCount As Long, flush As Boolean) As Long 699 GetCharsCountCore = srcCount + 1 '暫定 514 Next 515 DecodeImpl = True 700 516 End Function 701 517 702 518 Private 703 519 cp As DWord 704 nextByte As Byte705 520 End Class 521 522 Const DefalultDecodingBufferSize = 16384 706 523 707 524 End Namespace -
trunk/ab5.0/ablib/src/Classes/System/Text/UTF8Encoding.ab
r411 r676 12 12 Inherits Encoder 13 13 Protected 14 Override Sub ConvertCore(chars As *WCHAR, charCount As Long, bytes As *Byte, byteCount As Long, flush As Boolean, 15 ByRef bytesUsed As Long, ByRef charsUsed As Long, ByRef completed As Boolean) 16 17 Dim i As Long, j = 0 As Long 18 For i = 0 To ELM(charCount) 19 If chars[i] < &h80 Then 14 Override Sub EncodeImpl(src As *WCHAR, size As SIZE_T, s As IO.Stream, last As Boolean) 15 Dim i As Long 16 For i = 0 To ELM(size) 17 If buffer <> 0 Then 18 If _System_IsLowSurrogate(src[i]) Then 19 'UTF-16列からUnicodeコードポイントを復元 20 Dim c = (((buffer And &h3FF) As DWord << 10) Or (src[i] And &h3FF)) + &h10000 21 '4バイト変換 22 s.WriteByte(((c >> 18) Or &hf0) As Byte) 23 s.WriteByte(((c >> 12) And &h3F Or &h80) As Byte) 24 s.WriteByte(((c >> 6) And &h3F Or &h80) As Byte) 25 s.WriteByte((c And &h3F Or &h80) As Byte) 26 Else 27 writeReplacementChar(s) 28 End If 29 buffer = 0 30 ElseIf src[i] < &h80 Then 20 31 '1バイト変換 21 If j + 1 > byteCount Then 22 'バッファ不足 23 Goto *BufferOver 24 End If 25 bytes[j] = chars[i] As Byte 26 j++ 27 ElseIf chars[i] < &h800 Then 32 s.WriteByte(src[i] As Byte) 33 ElseIf src[i] < &h800 Then 28 34 '2バイト変換 29 If j + 2 > byteCount Then 30 Goto *BufferOver 31 End If 32 bytes[j] = ((chars[i] >> 6) Or &hC0) As Byte 33 j++ 34 bytes[j] = (chars[i] And &h3F Or &h80) As Byte 35 j++ 36 ElseIf _System_IsHighSurrogate(chars[i]) Then 37 If i + 1 >= charCount Then 38 'バッファに貯め込む 39 If flush = False Then 40 buffer = chars[i] 41 Exit Sub 42 End If 43 'ToDo: chars[i + 1]が範囲外になる場合が考慮されていない 44 ElseIf _System_IsLowSurrogate(chars[i + 1]) = False Then 45 'EncoderFallback 46 End If 47 If j + 4 > byteCount Then 48 Goto *BufferOver 49 End If 50 'UTF-16列からUnicodeコードポイントを復元 51 Dim c = (((chars[i] And &h3FF) As DWord << 10) Or (chars[i + 1] And &h3FF)) + &h10000 52 '4バイト変換 53 bytes[j] = ((c >> 18) Or &hf0) As Byte 54 j++ 55 bytes[j] = ((c >> 12) And &h3F Or &h80) As Byte 56 j++ 57 bytes[j] = ((c >> 6) And &h3F Or &h80) As Byte 58 j++ 59 bytes[j] = (c And &h3F Or &h80) As Byte 60 j++ 61 i++ 62 ElseIf _System_IsLowSurrogate(chars[i]) Then 63 'EncoderFallback 35 s.WriteByte(((src[i] >> 6) Or &hC0) As Byte) 36 s.WriteByte((src[i] And &h3F Or &h80) As Byte) 37 ElseIf _System_IsHighSurrogate(src[i]) Then 38 'バッファに貯め込む 39 buffer = src[i] 40 ElseIf _System_IsLowSurrogate(src[i]) Then 41 writeReplacementChar(s) 64 42 Else 65 43 '3バイト変換 66 If j + 3 > byteCount Then 67 Goto *BufferOver 68 End If 69 bytes[j] = ((chars[i] >> 12) Or &hE0) As Byte 70 j++ 71 bytes[j] = ((chars[i] >> 6) And &h3F Or &h80) As Byte 72 j++ 73 bytes[j] = (chars[i] And &h3F Or &h80) As Byte 74 j++ 44 s.WriteByte(((src[i] >> 12) Or &hE0) As Byte) 45 s.WriteByte(((src[i] >> 6) And &h3F Or &h80) As Byte) 46 s.WriteByte((src[i] And &h3F Or &h80) As Byte) 75 47 End If 76 48 Next 77 78 Exit Sub79 *BufferOver80 'バッファ不足81 Throw New ArgumentException("Buffer is not enough.", "bytes")82 49 End Sub 83 50 84 51 Private 52 ' U+FFFD Replacement CharacterのUTF-8表現、EF BF BDを書き込む。 53 Sub writeReplacementChar(s As IO.Stream) 54 Dim rc[2] = [&hef, &hbf, &hbd] As Byte 55 s.Write(rc, 0, Len(rc)) 56 End Sub 57 85 58 buffer As WCHAR 86 59 End Class … … 89 62 Inherits Decoder 90 63 Protected 91 Override Sub ConvertCore(bytes As *Byte, byteCount As Long, chars As *WCHAR, charCount As Long, flush As Boolean, 92 ByRef bytesUsed As Long, ByRef charsUsed As Long, ByRef completed As Boolean) 93 Dim i As Long, j = 0 As Long 94 For i = 0 To ELM(byteCount) 95 If state = 0 Then 96 If bytes[i] <= &h80 Then 64 Override Function DecodeImpl(dst As Collections.Generic.List<WCHAR>, s As IO.Stream) As Boolean 65 Dim i As Long 66 For i = 0 To DefalultDecodingBufferSize - 1 'ELM 67 Dim b = s.ReadByte() 68 If b = -1 Then 69 DecodeImpl = False 70 Exit Function 71 ElseIf state = 0 Then 72 If b <= &h80 Then 97 73 '1バイト変換 98 If j = charCount Then Goto *BufferOver 99 chars[j] = bytes[i] 100 j++ 101 ElseIf bytes[i] < &hC0 Then 102 'マルチバイトの2バイト目以降 103 'DecoderFallback完成までの暫定 104 If j = charCount Then Goto *BufferOver 105 chars[j] = &hfffd 106 j++ 107 ElseIf bytes[i] < &hD0 Then 74 dst.Add(b As WCHAR) 75 ElseIf b < &hC0 Then 76 '先頭バイトがなく、いきなりマルチバイトの2バイト目以降 77 dst.Add(&hFFFD As WCHAR) 78 ElseIf b < &hD0 Then 108 79 '2バイト文字の始まり 109 80 last = 2 110 buf = bytes[i] And &h3f111 state++ 112 ElseIf b ytes[i]< &hF0 Then81 buffer = b And &h3F 82 state++ 83 ElseIf b < &hF0 Then 113 84 '3バイト文字の始まり 114 85 last = 3 115 buf = bytes[i] And &h1f116 state++ 117 Else 86 buffer = b And &h1F 87 state++ 88 ElseIf b < &hF8 Then 118 89 '4バイト文字の始まり 119 90 last = 4 120 buf = bytes[i] And &h0f 121 state++ 91 buffer = b And &h0F 92 state++ 93 Else 94 '現在のUTF-8は4バイトを超える表現を認めていない。 95 dst.Add(&hFFFD As WCHAR) 122 96 End If 123 97 Else 124 If &h80 <= b ytes[i] And bytes[i]< &hC0 Then98 If &h80 <= b And b < &hC0 Then 125 99 'マルチバイト文字の2バイト目以降 126 buf <<= 6127 buf Or= bytes[i]And &h3F100 buffer <<= 6 101 buffer Or= b And &h3F 128 102 state++ 129 103 If state = last Then '最終バイトに到達 130 If state = 2 And buf >= &h80 Then 131 chars[j] = buf As WCHAR 132 j++ 133 ElseIf state = 3 And buf >= &h800 And buf < &hD800 And &hE0000 >= buf Then 134 chars[j] = buf As WCHAR 135 j++ 136 ElseIf state = 4 And buf <= &h10ffff Then 137 buf -= &h10000 138 chars[j] = (&hD800 Or (buf >> 10)) As WCHAR 139 j++ 140 chars[j] = (&hDC00 Or (buf And &h3FF)) As WCHAR 141 j++ 104 If state = 2 And buffer >= &h80 Then 105 dst.Add(buffer As WCHAR) 106 ElseIf state = 3 And buffer >= &h800 And buffer < &hD800 And &hE0000 >= buffer Then 107 dst.Add(buffer As WCHAR) 108 ElseIf state = 4 And buffer <= &h10FFFF Then 109 buffer -= &h10000 110 dst.Add((&hD800 Or (buffer >> 10)) As WCHAR) 111 dst.Add((&hDC00 Or (buffer And &h3FF)) As WCHAR) 142 112 Else 143 'DecoderFallback 144 If j = charCount Then Goto *BufferOver 145 chars[j] = &hfffd 146 j++ 113 '最短形式でないもの、4バイト形式で10FFFFを超えるコードポイントのもの 114 dst.Add(&hfffd As WCHAR) 147 115 End If 148 116 state = 0 149 117 End If 150 118 Else 151 '3, 4バイト文字の先頭 152 'DecoderFallback 153 If j = charCount Then Goto *BufferOver 154 chars[j] = &hfffd 155 j++ 119 'マルチバイト文字の途中なのに、それ以外のバイトが現れた場合 120 dst.Add(&hFFFD As WCHAR) 121 state = 0 156 122 End If 157 123 End If 158 124 Next 159 Exit Sub 160 *BufferOver 161 'バッファ不足 162 Throw New ArgumentException("Buffer is not enough.", "bytes") 163 End Sub 125 DecodeImpl = True 126 End Function 164 127 165 128 Private 166 buf As DWord129 buffer As DWord 167 130 state As Long 168 131 last As Long … … 180 143 Inherits Encoding 181 144 Public 182 183 145 Override Function Clone() As Object 184 Dim c = New UTF8Encoding 185 c.DecoderFallback = This.DecoderFallback 186 c.EncoderFallback = This.EncoderFallback 187 Return c 146 Clone = New UTF8Encoding 188 147 End Function 189 148 190 149 Override Function GetDecoder() As Decoder 191 150 GetDecoder = New Detail.UTF8Decoder 192 ' GetDecoder.Fallback = DecoderFallback193 151 End Function 194 152 195 153 Override Function GetEncoder() As Encoder 196 154 GetEncoder = New Detail.UTF8Encoder 197 ' GetEncoder.Fallback = EncoderFallback198 155 End Function 199 156 200 157 Override Function GetMaxByteCount(charCount As Long) As Long 201 ReturncharCount * 3158 GetMaxByteCount = charCount * 3 202 159 '全てがUTF-8で3バイトになる文字の場合が最大。 203 160 204 ' UTF-8で4バイトになる列は、UTF-16だとサロゲートペアで表現するので、205 '1単位あたりでは2バイトしか食わないことにな る。161 'なお、UTF-8で4バイトになる列は、UTF-16だとサロゲートペアで表現するので、 162 '1単位あたりでは2バイトしか食わないことになり、最大ではない。 206 163 End Function 207 164 208 165 Override Function GetMaxCharCount(byteCount As Long) As Long 209 166 '全てU+7F以下の文字だけだった場合 210 Return byteCount 211 End Function 212 Protected 213 Override Function GetBytesCountCore(s As *WCHAR, n As Long) As Long 214 End Function 215 216 Override Function GetBytesCore(chars As *WCHAR, charCount As Long, bytes As *Byte, byteCount As Long) As Long 217 End Function 218 219 Override Function GetCharsCountCore(s As *Byte, n As Long) As Long 220 End Function 221 222 Override Function GetCharsCore(bytes As *Byte, byteCount As Long, chars As *WCHAR, charCount As Long) As Long 223 End Function 224 Public 167 GetMaxCharCount = byteCount 168 End Function 169 225 170 Override Function GetPreamble() As *Byte 226 171 Return bom … … 230 175 Return Len(bom) 231 176 End Function 232 233 Override Function IsAlwaysNormalized() As Boolean 234 IsAlwaysNormalized = False 235 End Function 236 237 Override Function IsAlwaysNormalized(f As NormalizationForm) As Boolean 238 IsAlwaysNormalized = False 239 End Function 240 177 /* 241 178 Override Function BodyName() As String 242 179 Return "utf-8" … … 258 195 Return False 259 196 End Function 197 */ 198 Protected 199 Override Function GetBytesCountCore(src As *WCHAR, srcCount As Long) As Long 200 End Function 201 202 Override Function GetBytesCore(src As *WCHAR, srcCount As Long, dst As *Byte, dstCount As Long) As Long 203 End Function 204 205 Override Function GetCharsCountCore(src As *Byte, srcCount As Long) As Long 206 End Function 207 208 Override Function GetCharsCore(src As *Byte, srcCount As Long, dst As *WCHAR, dstCount As Long) As Long 209 End Function 210 260 211 Private 261 212 Static bom[2] = [&hEF, &hBB, &hBF] As Byte
Note:
See TracChangeset
for help on using the changeset viewer.