Mercurial > emacs
annotate admin/unidata/unidata-gen.el @ 112364:42e22c4f06b7
Move all DEFVAR'd globals into a structure -- threading infrastructure
* globals.h: New file.
* xterm.h (Vx_pixel_size_width_font_regexp): Remove declaration.
* window.h (Vinitial_window_system, Vminibuf_scroll_window)
(Vwindow_system_version): Remove declaration.
* w32term.h (Vw32_enable_palette)
(Vx_pixel_size_width_font_regexp): Remove declaration.
* w32menu.c (Voverriding_local_map)
(Voverriding_local_map_menu_flag): Remove declaration.
* w32inevt.c (Vw32_alt_is_meta, Vw32_apps_modifier)
(Vw32_capslock_is_shiftlock, Vw32_enable_caps_lock)
(Vw32_enable_num_lock, Vw32_lwindow_modifier)
(Vw32_pass_lwindow_to_system, Vw32_pass_rwindow_to_system)
(Vw32_phantom_key_code, Vw32_recognize_altgr)
(Vw32_rwindow_modifier, Vw32_scroll_lock_modifier)
(w32_use_full_screen_buffer): Remove declaration.
* w32.c (Vsystem_configuration, Vw32_downcase_file_names)
(Vw32_generate_fake_inodes, Vw32_get_true_file_attributes)
(w32_num_mouse_buttons, w32_pipe_read_delay): Remove declaration.
* termopts.h (Vtruncate_partial_width_windows, inverse_video)
(no_redraw_on_reenter, visible_bell): Remove declaration.
* sysdep.c (Vsystem_name): Remove declaration.
* syntax.h (parse_sexp_lookup_properties): Remove declaration.
* menu.h (Vmenu_updating_frame): Remove declaration.
* macros.h (Vexecuting_kbd_macro, executing_kbd_macro_index):
Remove declaration.
* lisp.h (Vafter_init_time, Vafter_load_alist)
(Vauto_save_list_file_name, Vbefore_init_time, Vcommand_history)
(Vcompletion_regexp_list, Vcurrent_load_list)
(Vcurrent_prefix_arg, Vdata_directory, Vdebug_on_error)
(Vdoc_directory, Vdoc_file_name, Vdynamic_library_alist)
(Vexec_directory, Vexec_path, Vexec_suffixes)
(Vface_font_rescale_alist, Vface_ignored_fonts, Vfeatures)
(Vhelp_form, Vhistory_length, Vinhibit_field_text_motion)
(Vinhibit_quit, Vinhibit_read_only, Vinhibit_redisplay)
(Vinstallation_directory, Vinvocation_directory)
(Vinvocation_name, Vload_file_rep_suffixes, Vload_history)
(Vload_suffixes, Vmark_even_if_inactive, Vmemory_full)
(Vmessage_log_max, Vobarray, Vprint_length, Vprint_level)
(Vpurify_flag, Vquit_flag, Vsaved_region_selection)
(Vscalable_fonts_allowed, Vselect_active_regions)
(Vshell_file_name, Vstandard_input, Vstandard_output)
(Vsystem_name, Vtemporary_file_directory, Vthrow_on_input)
(Vtop_level, Vtty_erase_char, Vundo_outer_limit)
(Vuser_login_name, Vwindow_scroll_functions)
(Vwindow_system_version, Vx_no_window_manager)
(Vx_resource_class, Vx_resource_name, baud_rate)
(completion_ignore_case, debug_on_next_call, gc_cons_threshold)
(history_delete_duplicates, inhibit_x_resources)
(last_nonmenu_event, load_in_progress, max_specpdl_size)
(minibuffer_auto_raise, print_escape_newlines, scroll_margin)
(use_dialog_box, use_file_dialog): Remove declaration. Include
globals.h.
* keymap.h (Voverriding_local_map)
(Voverriding_local_map_menu_flag, meta_prefix_char): Remove
declaration.
* keyboard.h (Vdouble_click_time, Vfunction_key_map)
(Vinput_method_function, Vkey_translation_map)
(Vlucid_menu_bar_dirty_flag, Vthis_original_command)
(do_mouse_tracking, extra_keyboard_modifiers)
(num_nonmacro_input_events): Remove declaration.
* intervals.h (Vchar_property_alias_alist)
(Vdefault_text_properties, Vinhibit_point_motion_hooks)
(Vtext_property_default_nonsticky): Remove declaration.
* gtkutil.h (x_gtk_file_dialog_help_text)
(x_gtk_show_hidden_files, x_gtk_use_old_file_dialog)
(x_gtk_whole_detached_tool_bar): Remove declaration.
* frame.h (Vdefault_frame_alist, Vframe_alpha_lower_limit)
(Vmenu_bar_mode, Vmouse_highlight, Vterminal_frame)
(Vtool_bar_mode, Vx_resource_class, Vx_resource_name)
(focus_follows_mouse): Remove declaration.
* fontset.h (Valternate_fontname_alist, Vfontset_alias_alist)
(Vignore_relative_composition, Votf_script_alist)
(Vuse_default_ascent, Vvertical_centering_font_regexp): Remove
declaration.
* font.h (Vfont_log): Remove declaration.
* dosfns.h (Vdos_display_scancodes, Vdos_version)
(Vdos_windows_version, dos_codepage, dos_country_code)
(dos_decimal_point, dos_hyper_key, dos_keyboard_layout)
(dos_keypad_mode, dos_super_key, dos_timezone_offset): Remove
declaration.
* disptab.h (Vglyph_table, Vstandard_display_table): Remove
declaration.
* dispextern.h (Vface_remapping_alist, Vglyphless_char_display)
(Vmouse_autoselect_window, Voverflow_newline_into_fringe)
(Vshow_trailing_whitespace, Vtool_bar_button_margin)
(Vtool_bar_style, cursor_in_echo_area, display_hourglass_p)
(inverse_video, mode_line_in_non_selected_windows)
(tool_bar_button_relief, tool_bar_max_label_size)
(underline_minimum_offset)
(unibyte_display_via_language_environment, x_stretch_cursor_p):
Remove declaration.
* composite.h (Vauto_composition_function)
(Vcomposition_function_table): Remove declaration.
* commands.h (Vexecuting_kbd_macro)
(Vminibuffer_local_completion_map)
(Vminibuffer_local_filename_completion_map)
(Vminibuffer_local_filename_must_match_map)
(Vminibuffer_local_map, Vminibuffer_local_must_match_map)
(Vminibuffer_local_ns_map, Vthis_command)
(Vunread_command_events, cursor_in_echo_area)
(last_command_event, last_nonmenu_event, unread_command_char):
Remove declaration.
* coding.h (Vcoding_system_for_read, Vcoding_system_for_write)
(Vdefault_file_name_coding_system)
(Vdefault_process_coding_system, Vfile_name_coding_system)
(Vlast_coding_system_used, Vlocale_coding_system)
(Vselect_safe_coding_system_function)
(Vtranslation_table_for_input, coding_system_require_warning)
(eol_mnemonic_dos, eol_mnemonic_mac, eol_mnemonic_undecided)
(eol_mnemonic_unix, inherit_process_coding_system): Remove
declaration.
* charset.h (Vcharset_list, Vcurrent_iso639_language): Remove
declaration.
* character.h (Vauto_fill_chars, Vchar_direction_table)
(Vchar_script_table, Vchar_width_table, Vprintable_chars)
(Vscript_representative_chars, Vtranslation_table_vector)
(Vunicode_category_table): Remove declaration.
* ccl.h (Vfont_ccl_encoder_alist): Remove declaration.
* buffer.h (Vafter_change_functions, Vbefore_change_functions)
(Vdeactivate_mark, Vfirst_change_hook, Vtransient_mark_mode)
(inhibit_modification_hooks): Remove declaration.
* xterm.c (syms_of_xterm): Update.
(Vx_alt_keysym, Vx_hyper_keysym, Vx_keysym_table)
(Vx_meta_keysym, Vx_super_keysym, Vx_toolkit_scroll_bars)
(x_mouse_click_focus_ignore_position)
(x_underline_at_descent_line)
(x_use_underline_position_properties): Remove.
* xsmfns.c (syms_of_xsmfns): Update.
(Vx_session_id, Vx_session_previous_id): Remove.
* xsettings.c (syms_of_xsettings): Update.
(Vxft_settings, use_system_font): Remove.
* xselect.c (syms_of_xselect): Update.
(Vselection_converter_alist, Vx_lost_selection_functions)
(Vx_sent_selection_functions, x_selection_timeout): Remove.
* xfns.c (syms_of_xfns): Update.
(Vgtk_version_string, Vmotif_version_string)
(Vx_cursor_fore_pixel, Vx_hourglass_pointer_shape)
(Vx_max_tooltip_size, Vx_mode_pointer_shape)
(Vx_no_window_manager, Vx_nontext_pointer_shape)
(Vx_pixel_size_width_font_regexp, Vx_pointer_shape)
(Vx_sensitive_text_pointer_shape)
(Vx_window_horizontal_drag_shape, x_gtk_file_dialog_help_text)
(x_gtk_show_hidden_files, x_gtk_use_old_file_dialog)
(x_gtk_use_system_tooltips, x_gtk_whole_detached_tool_bar):
Remove.
* xfaces.c (syms_of_xfaces): Update.
(Vface_default_stipple, Vface_font_rescale_alist)
(Vface_ignored_fonts, Vface_new_frame_defaults)
(Vface_remapping_alist, Vfont_list_limit)
(Vscalable_fonts_allowed, Vtty_defined_color_alist): Remove.
* xdisp.c (syms_of_xdisp): Update.
(Vauto_resize_tool_bars, Vblink_cursor_alist)
(Vdisplay_pixels_per_inch, Vfontification_functions)
(Vframe_title_format, Vglobal_mode_string)
(Vglyphless_char_display, Vhourglass_delay, Vhscroll_step)
(Vicon_title_format, Vinhibit_redisplay)
(Vline_number_display_limit, Vline_prefix)
(Vmax_mini_window_height, Vmenu_bar_update_hook)
(Vmenu_updating_frame, Vmessage_log_max)
(Vmouse_autoselect_window, Vnobreak_char_display)
(Voverlay_arrow_position, Voverlay_arrow_string)
(Voverlay_arrow_variable_list, Vredisplay_end_trigger_functions)
(Vresize_mini_windows, Vshow_trailing_whitespace)
(Vtool_bar_border, Vtool_bar_button_margin, Vtool_bar_style)
(Vtruncate_partial_width_windows, Vvoid_text_area_pointer)
(Vwindow_scroll_functions, Vwindow_size_change_functions)
(Vwindow_text_change_functions, Vwrap_prefix)
(auto_raise_tool_bar_buttons_p, automatic_hscrolling_p)
(debug_end_pos, display_hourglass_p, emacs_scroll_step)
(highlight_nonselected_windows, hscroll_margin)
(inhibit_eval_during_redisplay, inhibit_free_realized_faces)
(inhibit_menubar_update, inhibit_try_cursor_movement)
(inhibit_try_window_id, inhibit_try_window_reusing)
(line_number_display_limit_width)
(make_cursor_line_fully_visible_p, message_truncate_lines)
(mode_line_inverse_video, multiple_frames, overline_margin)
(scroll_conservatively, scroll_margin, tool_bar_button_relief)
(tool_bar_max_label_size, underline_minimum_offset)
(unibyte_display_via_language_environment, x_stretch_cursor_p):
Remove.
* window.c (syms_of_window): Update.
(Vminibuf_scroll_window, Vother_window_scroll_buffer)
(Vrecenter_redisplay, Vscroll_preserve_screen_position)
(Vtemp_buffer_show_function, Vwindow_configuration_change_hook)
(Vwindow_point_insertion_type, auto_window_vscroll_p)
(mode_line_in_non_selected_windows, next_screen_context_lines)
(window_min_height, window_min_width): Remove.
(scroll_margin): Remove declaration.
* w32term.c (syms_of_w32term): Update.
(Vw32_capslock_is_shiftlock, Vw32_grab_focus_on_raise)
(Vw32_recognize_altgr, Vw32_swap_mouse_buttons)
(Vx_toolkit_scroll_bars, w32_num_mouse_buttons)
(w32_use_visible_system_caret, x_underline_at_descent_line)
(x_use_underline_position_properties): Remove.
(Vcommand_line_args, Vsystem_name, extra_keyboard_modifiers):
Remove declaration.
* w32select.c (syms_of_w32select): Update.
(Vnext_selection_coding_system, Vselection_coding_system): Remove.
* w32proc.c (syms_of_ntproc): Update.
(Vw32_downcase_file_names, Vw32_generate_fake_inodes)
(Vw32_get_true_file_attributes, Vw32_quote_process_args)
(Vw32_start_process_inherit_error_mode)
(Vw32_start_process_share_console)
(Vw32_start_process_show_window, w32_pipe_read_delay): Remove.
(Vsystem_name): Remove declaration.
* w32font.c (syms_of_w32font): Update.
(Vw32_charset_info_alist): Remove.
* w32fns.c (globals_of_w32fns, syms_of_w32fns): Update.
(Vw32_alt_is_meta, Vw32_apps_modifier, Vw32_bdf_filename_alist)
(Vw32_color_map, Vw32_enable_caps_lock, Vw32_enable_num_lock)
(Vw32_enable_palette, Vw32_lwindow_modifier)
(Vw32_pass_alt_to_system, Vw32_pass_lwindow_to_system)
(Vw32_pass_rwindow_to_system, Vw32_phantom_key_code)
(Vw32_rwindow_modifier, Vw32_scroll_lock_modifier)
(Vx_cursor_fore_pixel, Vx_hourglass_pointer_shape)
(Vx_max_tooltip_size, Vx_mode_pointer_shape)
(Vx_no_window_manager, Vx_nontext_pointer_shape)
(Vx_pixel_size_width_font_regexp, Vx_pointer_shape)
(Vx_sensitive_text_pointer_shape)
(Vx_window_horizontal_drag_shape, w32_ansi_code_page)
(w32_enable_synthesized_fonts, w32_mouse_button_tolerance)
(w32_mouse_move_interval)
(w32_pass_extra_mouse_buttons_to_system)
(w32_pass_multimedia_buttons_to_system, w32_quit_key)
(w32_strict_fontnames, w32_strict_painting): Remove.
(Vhourglass_delay, Vmenu_bar_mode, Vtool_bar_mode)
(Vw32_recognize_altgr, Vwindow_system_version)
(w32_num_mouse_buttons, w32_use_visible_system_caret): Remove
declaration.
* w32console.c (syms_of_ntterm): Update.
(w32_use_full_screen_buffer): Remove.
(Vtty_defined_color_alist): Remove declaration.
* w16select.c (syms_of_win16select): Update.
(Vnext_selection_coding_system, Vselection_coding_system): Remove.
* undo.c (syms_of_undo): Update.
(Vundo_outer_limit, Vundo_outer_limit_function)
(undo_inhibit_record_point, undo_limit, undo_strong_limit):
Remove.
* textprop.c (syms_of_textprop): Update.
(Vchar_property_alias_alist, Vdefault_text_properties)
(Vinhibit_point_motion_hooks, Vtext_property_default_nonsticky):
Remove.
* terminal.c (syms_of_terminal): Update.
(Vdelete_terminal_functions, Vring_bell_function): Remove.
* term.c (syms_of_term): Update.
(Vresume_tty_functions, Vsuspend_tty_functions)
(no_redraw_on_reenter, system_uses_terminfo, visible_cursor):
Remove.
* syntax.c (syms_of_syntax): Update.
(Vfind_word_boundary_function_table, multibyte_syntax_as_symbol)
(open_paren_in_column_0_is_defun_start)
(parse_sexp_ignore_comments, parse_sexp_lookup_properties)
(words_include_escapes): Remove.
* search.c (syms_of_search): Update.
(Vinhibit_changing_match_data, Vsearch_spaces_regexp): Remove.
* process.c (syms_of_process): Update.
(Vprocess_adaptive_read_buffering, Vprocess_connection_type)
(delete_exited_processes): Remove.
* print.c (syms_of_print): Update.
(Vfloat_output_format, Vprint_charset_text_property)
(Vprint_circle, Vprint_continuous_numbering, Vprint_gensym)
(Vprint_length, Vprint_level, Vprint_number_table)
(Vstandard_output, print_escape_multibyte)
(print_escape_newlines, print_escape_nonascii, print_quoted):
Remove.
* msdos.c (syms_of_msdos): Update.
(Vdos_unsupported_char_glyph): Remove.
(unibyte_display_via_language_environment): Remove declaration.
* minibuf.c (syms_of_minibuf): Update.
(Vcompletion_regexp_list, Vhistory_add_new_input)
(Vhistory_length, Vminibuffer_completing_file_name)
(Vminibuffer_completion_confirm)
(Vminibuffer_completion_predicate, Vminibuffer_completion_table)
(Vminibuffer_exit_hook, Vminibuffer_help_form)
(Vminibuffer_history_position, Vminibuffer_history_variable)
(Vminibuffer_prompt_properties, Vminibuffer_setup_hook)
(Vread_buffer_function, Vread_expression_map)
(completion_ignore_case, enable_recursive_minibuffers)
(history_delete_duplicates, minibuffer_allow_text_properties)
(minibuffer_auto_raise, read_buffer_completion_ignore_case):
Remove.
* marker.c (syms_of_marker): Update.
(byte_debug_flag): Remove.
* macros.c (syms_of_macros): Update.
(Vexecuting_kbd_macro, executing_kbd_macro_index): Remove.
* lread.c (syms_of_lread): Update.
(Vafter_load_alist, Vbyte_boolean_vars)
(Vbytecomp_version_regexp, Vcurrent_load_list)
(Veval_buffer_list, Vload_file_name, Vload_file_rep_suffixes)
(Vload_history, Vload_path, Vload_read_function)
(Vload_source_file_function, Vload_suffixes, Vobarray)
(Vold_style_backquotes, Vpreloaded_file_list, Vread_circle)
(Vread_symbol_positions_list, Vread_with_symbol_positions)
(Vsource_directory, Vstandard_input, Vuser_init_file, Vvalues)
(force_load_messages, load_convert_to_unibyte)
(load_dangerous_libraries, load_force_doc_strings)
(load_in_progress): Remove.
* keymap.c (syms_of_keymap): Update.
(Vdefine_key_rebound_commands, Vemulation_mode_map_alists)
(Vminibuffer_local_completion_map)
(Vminibuffer_local_filename_completion_map)
(Vminibuffer_local_filename_must_match_map)
(Vminibuffer_local_map, Vminibuffer_local_must_match_map)
(Vminibuffer_local_ns_map, Vminor_mode_map_alist)
(Vminor_mode_overriding_map_alist, Vwhere_is_preferred_modifier):
Remove.
* keyboard.c (syms_of_keyboard): Update.
(Vauto_save_timeout, Vcommand_error_function)
(Vcommand_hook_internal, Vdeactivate_mark)
(Vdeferred_action_function, Vdeferred_action_list)
(Vdisable_point_adjustment, Vdouble_click_time)
(Vecho_keystrokes, Venable_disabled_menus_and_buttons)
(Vfunction_key_map, Vglobal_disable_point_adjustment)
(Vhelp_char, Vhelp_event_list, Vhelp_form)
(Vinput_method_function, Vinput_method_previous_message)
(Vkey_translation_map, Vlast_event_frame)
(Vlucid_menu_bar_dirty_flag, Vmenu_bar_final_items)
(Vminibuffer_message_timeout, Voverriding_local_map)
(Voverriding_local_map_menu_flag, Vpost_command_hook)
(Vpre_command_hook, Vprefix_help_command)
(Vsaved_region_selection, Vselect_active_regions)
(Vshow_help_function, Vspecial_event_map, Vsuggest_key_bindings)
(Vthis_command, Vthis_command_keys_shift_translated)
(Vthis_original_command, Vthrow_on_input, Vtimer_idle_list)
(Vtimer_list, Vtool_bar_separator_image_expression, Vtop_level)
(Vtty_erase_char, Vunread_command_events)
(Vunread_input_method_events, Vunread_post_input_method_events)
(auto_save_interval, cannot_suspend, do_mouse_tracking)
(double_click_fuzz, extra_keyboard_modifiers)
(inhibit_local_menu_bar_menus, last_command_event)
(last_input_event, last_nonmenu_event, menu_prompt_more_char)
(menu_prompting, meta_prefix_char, num_input_keys)
(num_nonmacro_input_events, polling_period, unread_command_char):
Remove.
* insdel.c (syms_of_insdel): Update.
(Vcombine_after_change_calls, check_markers_debug_flag): Remove.
* indent.c (syms_of_indent): Update.
(indent_tabs_mode): Remove.
* image.c (syms_of_image): Update.
(Vimage_cache_eviction_delay, Vimage_types)
(Vimagemagick_render_type, Vmax_image_size, Vx_bitmap_file_path)
(cross_disabled_images): Remove.
* fringe.c (syms_of_fringe): Update.
(Vfringe_bitmaps, Voverflow_newline_into_fringe): Remove.
* frame.c (syms_of_frame): Update.
(Vdefault_frame_alist, Vdefault_frame_scroll_bars)
(Vdelete_frame_functions, Vframe_alpha_lower_limit)
(Vmake_pointer_invisible, Vmenu_bar_mode, Vmouse_highlight)
(Vmouse_position_function, Vterminal_frame, Vtool_bar_mode)
(Vx_resource_class, Vx_resource_name, focus_follows_mouse):
Remove.
* fontset.c (syms_of_fontset): Update.
(Valternate_fontname_alist, Vfont_encoding_charset_alist)
(Vfontset_alias_alist, Vignore_relative_composition)
(Votf_script_alist, Vuse_default_ascent)
(Vvertical_centering_font_regexp): Remove.
* font.c (syms_of_font): Update.
(Vfont_encoding_alist, Vfont_log, Vfont_slant_table)
(Vfont_weight_table, Vfont_width_table): Remove.
* fns.c (syms_of_fns): Update.
(Vfeatures, use_dialog_box, use_file_dialog): Remove.
* filelock.c (syms_of_filelock): Update.
(Vtemporary_file_directory): Remove.
* fileio.c (syms_of_fileio): Update.
(Vafter_insert_file_functions, Vauto_save_include_big_deletions)
(Vauto_save_list_file_name, Vauto_save_visited_file_name)
(Vdefault_file_name_coding_system, Vfile_name_coding_system)
(Vfile_name_handler_alist, Vinhibit_file_name_handlers)
(Vinhibit_file_name_operation, Vset_auto_coding_function)
(Vwrite_region_annotate_functions)
(Vwrite_region_annotations_so_far)
(Vwrite_region_post_annotation_function)
(delete_by_moving_to_trash, write_region_inhibit_fsync): Remove.
(Vw32_get_true_file_attributes): Remove declaration.
* eval.c (syms_of_eval): Update.
(Vdebug_ignored_errors, Vdebug_on_error, Vdebug_on_signal)
(Vdebugger, Vinhibit_quit, Vmacro_declaration_function)
(Vquit_flag, Vsignal_hook_function, Vstack_trace_on_error)
(debug_on_next_call, debug_on_quit, debugger_may_continue)
(max_lisp_eval_depth, max_specpdl_size): Remove.
* emacs.c (syms_of_emacs): Update.
(Vafter_init_time, Vbefore_init_time, Vcommand_line_args)
(Vdynamic_library_alist, Vemacs_copyright, Vemacs_version)
(Vinstallation_directory, Vinvocation_directory)
(Vinvocation_name, Vkill_emacs_hook, Vpath_separator)
(Vprevious_system_messages_locale, Vprevious_system_time_locale)
(Vsystem_configuration, Vsystem_configuration_options)
(Vsystem_messages_locale, Vsystem_time_locale, Vsystem_type)
(inhibit_x_resources, noninteractive1): Remove.
* editfns.c (syms_of_editfns): Update.
(Vbuffer_access_fontified_property)
(Vbuffer_access_fontify_functions, Vinhibit_field_text_motion)
(Voperating_system_release, Vsystem_name, Vuser_full_name)
(Vuser_login_name, Vuser_real_login_name): Remove.
* dosfns.c (syms_of_dosfns): Update.
(Vdos_display_scancodes, Vdos_version, Vdos_windows_version)
(dos_codepage, dos_country_code, dos_decimal_point)
(dos_hyper_key, dos_keyboard_layout, dos_keypad_mode)
(dos_super_key, dos_timezone_offset): Remove.
* doc.c (syms_of_doc): Update.
(Vbuild_files, Vdoc_file_name): Remove.
* dispnew.c (syms_of_display): Update.
(Vglyph_table, Vinitial_window_system)
(Vredisplay_preemption_period, Vstandard_display_table)
(Vwindow_system_version, baud_rate, cursor_in_echo_area)
(inverse_video, redisplay_dont_pause, visible_bell): Remove.
* dired.c (syms_of_dired): Update.
(Vcompletion_ignored_extensions): Remove.
(Vw32_get_true_file_attributes): Remove declaration.
* dbusbind.c (syms_of_dbusbind): Update.
(Vdbus_debug, Vdbus_registered_buses)
(Vdbus_registered_objects_table): Remove.
* data.c (syms_of_data): Update.
(Vmost_negative_fixnum, Vmost_positive_fixnum): Remove.
* composite.c (syms_of_composite): Update.
(Vauto_composition_function, Vauto_composition_mode)
(Vcompose_chars_after_function, Vcomposition_function_table):
Remove.
* coding.c (syms_of_coding): Update.
(Vcharset_revision_table, Vcoding_category_list)
(Vcoding_system_alist, Vcoding_system_for_read)
(Vcoding_system_for_write, Vcoding_system_list)
(Vdefault_process_coding_system, Venable_character_translation)
(Vfile_coding_system_alist, Vlast_code_conversion_error)
(Vlast_coding_system_used, Vlatin_extra_code_table)
(Vlocale_coding_system, Vnetwork_coding_system_alist)
(Vprocess_coding_system_alist)
(Vselect_safe_coding_system_function)
(Vstandard_translation_table_for_decode)
(Vstandard_translation_table_for_encode)
(Vtranslation_table_for_input, coding_system_require_warning)
(eol_mnemonic_dos, eol_mnemonic_mac, eol_mnemonic_undecided)
(eol_mnemonic_unix, inherit_process_coding_system)
(inhibit_eol_conversion, inhibit_iso_escape_detection)
(inhibit_null_byte_detection): Remove.
* cmds.c (syms_of_cmds): Update.
(Vpost_self_insert_hook): Remove.
* charset.c (syms_of_charset): Update.
(Vcharset_list, Vcharset_map_path, Vcurrent_iso639_language)
(inhibit_load_charset_map): Remove.
* character.c (syms_of_character): Update.
(Vauto_fill_chars, Vchar_direction_table, Vchar_script_table)
(Vchar_width_table, Vprintable_chars)
(Vscript_representative_chars, Vtranslation_table_vector)
(Vunicode_category_table): Remove.
* ccl.c (syms_of_ccl): Update.
(Vcode_conversion_map_vector, Vfont_ccl_encoder_alist)
(Vtranslation_hash_table_vector): Remove.
* category.c (syms_of_category): Update.
(Vword_combining_categories, Vword_separating_categories): Remove.
* callproc.c (syms_of_callproc): Update.
(Vconfigure_info_directory, Vdata_directory, Vdoc_directory)
(Vexec_directory, Vexec_path, Vexec_suffixes)
(Vinitial_environment, Vprocess_environment)
(Vshared_game_score_directory, Vshell_file_name): Remove.
* callint.c (syms_of_callint): Update.
(Vcommand_debug_status, Vcommand_history, Vcurrent_prefix_arg)
(Vmark_even_if_inactive, Vmouse_leave_buffer_hook): Remove.
* bytecode.c (syms_of_bytecode): Update.
(Vbyte_code_meter, byte_metering_on): Remove.
* buffer.c (syms_of_buffer): Update.
(Vafter_change_functions, Vbefore_change_functions)
(Vchange_major_mode_hook, Vfirst_change_hook)
(Vinhibit_read_only, Vkill_buffer_query_functions)
(Vtransient_mark_mode, inhibit_modification_hooks): Remove.
* alloc.c (syms_of_alloc): Update.
(Vgc_cons_percentage, Vgc_elapsed, Vmemory_full)
(Vmemory_signal_data, Vpost_gc_hook, Vpurify_flag)
(cons_cells_consed, floats_consed, garbage_collection_messages)
(gc_cons_threshold, gcs_done, intervals_consed)
(misc_objects_consed, pure_bytes_used, string_chars_consed)
(strings_consed, symbols_consed, vector_cells_consed): Remove.
* lisp.h (DEFVAR_LISP, DEFVAR_LISP_NOPRO, DEFVAR_BOOL)
(DEFVAR_INT): Assume global is in `globals'.
* alloc.c (globals): Define.
author | Tom Tromey <tromey@redhat.com> |
---|---|
date | Tue, 18 Jan 2011 13:45:37 -0700 |
parents | 294500476da2 |
children |
rev | line source |
---|---|
90086 | 1 ;; unidata-gen.el -- Create files containing character property data. |
112218
376148b31b5e
Add 2011 to FSF/AIST copyright years.
Glenn Morris <rgm@gnu.org>
parents:
110987
diff
changeset
|
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
90086 | 3 ;; National Institute of Advanced Industrial Science and Technology (AIST) |
4 ;; Registration Number H13PRO009 | |
5 | |
6 ;; This file is part of GNU Emacs. | |
7 | |
94829
aeac1d771ae4
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94071
diff
changeset
|
8 ;; GNU Emacs is free software: you can redistribute it and/or modify |
90086 | 9 ;; it under the terms of the GNU General Public License as published by |
94829
aeac1d771ae4
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94071
diff
changeset
|
10 ;; the Free Software Foundation, either version 3 of the License, or |
aeac1d771ae4
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94071
diff
changeset
|
11 ;; (at your option) any later version. |
90086 | 12 |
13 ;; GNU Emacs is distributed in the hope that it will be useful, | |
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 ;; GNU General Public License for more details. | |
17 | |
18 ;; You should have received a copy of the GNU General Public License | |
94829
aeac1d771ae4
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94071
diff
changeset
|
19 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
90086 | 20 |
21 ;;; Commentary: | |
22 | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
23 ;; SPECIAL NOTICE |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
24 ;; |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
25 ;; This file must be byte-compilable/loadable by `temacs' and also |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
26 ;; the entry function `unidata-gen-files' must be runnable by |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
27 ;; `temacs'. |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
28 |
90086 | 29 ;; FILES TO BE GENERATED |
30 ;; | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
31 ;; The entry function `unidata-gen-files' generates these files in |
90086 | 32 ;; the current directory. |
33 ;; | |
34 ;; charprop.el | |
35 ;; It contains a series of forms of this format: | |
36 ;; (char-code-property-register PROP FILE) | |
37 ;; where PROP is a symbol representing a character property | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
38 ;; (name, generic-category, etc), and FILE is a name of one of |
90086 | 39 ;; the following files. |
40 ;; | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
41 ;; uni-name.el, uni-category.el, uni-combining.el, uni-bidi.el, |
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
42 ;; uni-decomposition.el, uni-decimal.el, uni-digit.el, uni-numeric.el, |
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
43 ;; uni-mirrored.el, uni-old-name.el, uni-comment.el, uni-uppercase.el, |
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
44 ;; uni-lowercase.el, uni-titlecase.el |
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
45 ;; They each contain a single form of this format: |
90086 | 46 ;; (char-code-property-register PROP CHAR-TABLE) |
47 ;; where PROP is the same as above, and CHAR-TABLE is a | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
48 ;; char-table containing property values in a compressed format. |
90086 | 49 ;; |
50 ;; When they are installed in .../lisp/international/, the file | |
51 ;; "charprop.el" is preloaded in loadup.el. The other files are | |
52 ;; automatically loaded when the functions `get-char-code-property' | |
53 ;; and `put-char-code-property' are called. | |
54 ;; | |
55 ;; FORMAT OF A CHAR TABLE | |
56 ;; | |
57 ;; We want to make a file size containing a char-table small. We | |
58 ;; also want to load the file and get a property value fast. We | |
59 ;; also want to reduce the used memory after loading it. So, | |
60 ;; instead of naively storing a property value for each character in | |
61 ;; a char-table (and write it out into a file), we store compressed | |
62 ;; data in a char-table as below. | |
63 ;; | |
64 ;; If succeeding 128*N characters have the same property value, we | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
65 ;; store that value for them. Otherwise, compress values for |
90086 | 66 ;; succeeding 128 characters into a single string and store it as a |
67 ;; value for those characters. The way of compression depends on a | |
68 ;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE", | |
69 ;; and "WORD-LIST TABLE". | |
70 | |
71 ;; The char table has four extra slots: | |
72 ;; 1st: property symbol | |
73 ;; 2nd: function to call to get a property value | |
74 ;; 3nd: function to call to put a property value | |
75 ;; 4th: function to call to get a description of a property value | |
76 ;; 5th: data referred by the above functions | |
77 | |
78 ;; List of elements of this form: | |
79 ;; (CHAR-or-RANGE PROP1 PROP2 ... PROPn) | |
80 ;; CHAR-or-RANGE: a character code or a cons of character codes | |
81 ;; PROPn: string representing the nth property value | |
82 | |
90174
f3973ae57d8b
(unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents:
90162
diff
changeset
|
83 (defvar unidata-list nil) |
f3973ae57d8b
(unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents:
90162
diff
changeset
|
84 |
f3973ae57d8b
(unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents:
90162
diff
changeset
|
85 (defun unidata-setup-list (unidata-text-file) |
90086 | 86 (let* ((table (list nil)) |
87 (tail table) | |
88 (block-names '(("^<CJK Ideograph" . CJK\ IDEOGRAPH) | |
89 ("^<Hangul Syllable" . HANGUL\ SYLLABLE) | |
90 ("^<.*Surrogate" . nil) | |
91 ("^<.*Private Use" . PRIVATE\ USE))) | |
92 val char name) | |
93 (or (file-readable-p unidata-text-file) | |
94 (error "File not readable: %s" unidata-text-file)) | |
95 (with-temp-buffer | |
105873
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
96 ;; Insert a file of this format: |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
97 ;; (CHAR NAME CATEGORY ...) |
110987
cda2045a5ee8
Fix typos in docstrings, comments and ChangeLogs.
Juanma Barranquero <lekktu@gmail.com>
parents:
106815
diff
changeset
|
98 ;; where CHAR is a character code, the following elements are strings |
105873
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
99 ;; representing character properties. |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
100 (insert-file-contents unidata-text-file) |
90086 | 101 (goto-char (point-min)) |
102 (condition-case nil | |
103 (while t | |
104 (setq val (read (current-buffer)) | |
105 char (car val) | |
106 name (cadr val)) | |
107 | |
108 ;; Check this kind of block. | |
109 ;; 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; | |
105873
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
110 ;; 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; |
90086 | 111 (if (and (= (aref name 0) ?<) |
112 (string-match ", First>$" name)) | |
113 (let ((first char) | |
114 (l block-names) | |
115 block-name) | |
116 (setq val (read (current-buffer)) | |
117 char (car val) | |
118 block-name (cadr val) | |
119 name nil) | |
120 (while l | |
121 (if (string-match (caar l) block-name) | |
122 (setq name (cdar l) l nil) | |
123 (setq l (cdr l)))) | |
124 (if (not name) | |
125 ;; As this is a surrogate pair range, ignore it. | |
126 (setq val nil) | |
127 (setcar val (cons first char)) | |
128 (setcar (cdr val) name)))) | |
129 | |
130 (when val | |
131 (setcdr tail (list val)) | |
132 (setq tail (cdr tail)))) | |
133 (error nil))) | |
90174
f3973ae57d8b
(unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents:
90162
diff
changeset
|
134 (setq unidata-list (cdr table)))) |
90086 | 135 |
136 ;; Alist of this form: | |
137 ;; (PROP INDEX GENERATOR FILENAME) | |
138 ;; PROP: character property | |
139 ;; INDEX: index to each element of unidata-list for PROP | |
140 ;; GENERATOR: function to generate a char-table | |
141 ;; FILENAME: filename to store the char-table | |
142 ;; DESCRIBER: function to call to get a description string of property value | |
143 | |
144 (defconst unidata-prop-alist | |
145 '((name | |
146 1 unidata-gen-table-name "uni-name.el" | |
147 "Unicode character name. | |
148 Property value is a string.") | |
149 (general-category | |
150 2 unidata-gen-table-symbol "uni-category.el" | |
151 "Unicode general category. | |
152 Property value is one of the following symbols: | |
153 Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, | |
154 Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" | |
155 unidata-describe-general-category) | |
156 (canonical-combining-class | |
157 3 unidata-gen-table-integer "uni-combining.el" | |
158 "Unicode canonical combining class. | |
159 Property value is an integer." | |
160 unidata-describe-canonical-combining-class) | |
161 (bidi-class | |
162 4 unidata-gen-table-symbol "uni-bidi.el" | |
163 "Unicode bidi class. | |
164 Property value is one of the following symbols: | |
165 L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, | |
166 AN, CS, NSM, BN, B, S, WS, ON" | |
167 unidata-describe-bidi-class) | |
168 (decomposition | |
169 5 unidata-gen-table-decomposition "uni-decomposition.el" | |
170 "Unicode decomposition mapping. | |
171 Property value is a list of characters. The first element may be | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
172 one of these symbols representing compatibility formatting tag: |
100107
eff1b0128211
(unidata-prop-alist): Docstring for
Kenichi Handa <handa@m17n.org>
parents:
100094
diff
changeset
|
173 font, noBreak, initial, medial, final, isolated, circle, super, |
eff1b0128211
(unidata-prop-alist): Docstring for
Kenichi Handa <handa@m17n.org>
parents:
100094
diff
changeset
|
174 sub, vertical, wide, narrow, small, square, fraction, compat" |
90086 | 175 unidata-describe-decomposition) |
176 (decimal-digit-value | |
177 6 unidata-gen-table-integer "uni-decimal.el" | |
178 "Unicode numeric value (decimal digit). | |
179 Property value is an integer.") | |
180 (digit-value | |
181 7 unidata-gen-table-integer "uni-digit.el" | |
182 "Unicode numeric value (digit). | |
183 Property value is an integer.") | |
184 (numeric-value | |
100093
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
185 8 unidata-gen-table-numeric "uni-numeric.el" |
90086 | 186 "Unicode numeric value (numeric). |
100094
c39e7dbb8896
(unidata-prop-alist): Docstring adjusted.
Kenichi Handa <handa@m17n.org>
parents:
100093
diff
changeset
|
187 Property value is an integer or a floating point.") |
90086 | 188 (mirrored |
189 9 unidata-gen-table-symbol "uni-mirrored.el" | |
190 "Unicode bidi mirrored flag. | |
191 Property value is a symbol `Y' or `N'.") | |
192 (old-name | |
193 10 unidata-gen-table-name "uni-old-name.el" | |
194 "Unicode old names as published in Unicode 1.0. | |
195 Property value is a string.") | |
196 (iso-10646-comment | |
197 11 unidata-gen-table-name "uni-comment.el" | |
198 "Unicode ISO 10646 comment. | |
199 Property value is a string.") | |
200 (uppercase | |
201 12 unidata-gen-table-character "uni-uppercase.el" | |
202 "Unicode simple uppercase mapping. | |
203 Property value is a character." | |
204 string) | |
205 (lowercase | |
206 13 unidata-gen-table-character "uni-lowercase.el" | |
207 "Unicode simple lowercase mapping. | |
208 Property value is a character." | |
209 string) | |
210 (titlecase | |
211 14 unidata-gen-table-character "uni-titlecase.el" | |
212 "Unicode simple titlecase mapping. | |
213 Property value is a character." | |
214 string))) | |
215 | |
216 ;; Functions to access the above data. | |
217 (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) | |
218 (defsubst unidata-prop-generator (prop) (nth 2 (assq prop unidata-prop-alist))) | |
219 (defsubst unidata-prop-file (prop) (nth 3 (assq prop unidata-prop-alist))) | |
220 (defsubst unidata-prop-docstring (prop) (nth 4 (assq prop unidata-prop-alist))) | |
221 (defsubst unidata-prop-describer (prop) (nth 5 (assq prop unidata-prop-alist))) | |
222 | |
223 | |
224 ;; SIMPLE TABLE | |
225 ;; | |
226 ;; If the type of character property value is character, and the | |
227 ;; values of succeeding character codes are usually different, we use | |
228 ;; a char-table described here to store such values. | |
229 ;; | |
230 ;; If succeeding 128 characters has no property, a char-table has the | |
105873
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
231 ;; symbol t for them. Otherwise a char-table has a string of the |
90086 | 232 ;; following format for them. |
233 ;; | |
234 ;; The first character of the string is FIRST-INDEX. | |
235 ;; The Nth (N > 0) character of the string is a property value of the | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
236 ;; character (BLOCK-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is |
90086 | 237 ;; the first of the characters in the block. |
238 ;; | |
239 ;; The 4th extra slot of a char-table is nil. | |
240 | |
241 (defun unidata-get-character (char val table) | |
242 (cond | |
243 ((characterp val) | |
244 val) | |
245 | |
246 ((stringp val) | |
247 (let* ((len (length val)) | |
248 (block-head (lsh (lsh char -7) 7)) | |
249 (vec (make-vector 128 nil)) | |
250 (first-index (aref val 0))) | |
251 (dotimes (i (1- len)) | |
252 (let ((elt (aref val (1+ i)))) | |
253 (if (> elt 0) | |
254 (aset vec (+ first-index i) elt)))) | |
255 (dotimes (i 128) | |
256 (aset table (+ block-head i) (aref vec i))) | |
257 (aref vec (- char block-head)))))) | |
258 | |
259 (defun unidata-put-character (char val table) | |
260 (or (characterp val) | |
261 (not val) | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
262 (error "Not a character nor nil: %S" val)) |
90086 | 263 (let ((current-val (aref table char))) |
264 (unless (eq current-val val) | |
265 (if (stringp current-val) | |
266 (funcall (char-table-extra-slot table 1) char current-val table)) | |
267 (aset table char val)))) | |
268 | |
269 (defun unidata-gen-table-character (prop) | |
270 (let ((table (make-char-table 'char-code-property-table)) | |
271 (prop-idx (unidata-prop-index prop)) | |
272 (vec (make-vector 128 0)) | |
273 (tail unidata-list) | |
274 elt range val idx slot) | |
275 (set-char-table-range table (cons 0 (max-char)) t) | |
276 (while tail | |
277 (setq elt (car tail) tail (cdr tail)) | |
278 (setq range (car elt) | |
279 val (nth prop-idx elt)) | |
280 (if (= (length val) 0) | |
281 (setq val nil) | |
282 (setq val (string-to-number val 16))) | |
283 (if (consp range) | |
284 (if val | |
285 (set-char-table-range table range val)) | |
286 (let* ((start (lsh (lsh range -7) 7)) | |
287 (limit (+ start 127)) | |
288 first-index last-index) | |
289 (fillarray vec 0) | |
290 (if val | |
291 (aset vec (setq last-index (setq first-index (- range start))) | |
292 val)) | |
293 (while (and (setq elt (car tail) range (car elt)) | |
294 (integerp range) | |
295 (<= range limit)) | |
296 (setq val (nth prop-idx elt)) | |
297 (when (> (length val) 0) | |
298 (aset vec (setq last-index (- range start)) | |
299 (string-to-number val 16)) | |
300 (or first-index | |
301 (setq first-index last-index))) | |
302 (setq tail (cdr tail))) | |
303 (when first-index | |
304 (let ((str (string first-index)) | |
305 c) | |
306 (while (<= first-index last-index) | |
307 (setq str (format "%s%c" str (or (aref vec first-index) 0)) | |
308 first-index (1+ first-index))) | |
309 (set-char-table-range table (cons start limit) str)))))) | |
310 | |
311 (set-char-table-extra-slot table 0 prop) | |
312 (byte-compile 'unidata-get-character) | |
313 (byte-compile 'unidata-put-character) | |
314 (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-character)) | |
315 (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-character)) | |
316 | |
317 table)) | |
318 | |
319 | |
320 | |
321 ;; RUN-LENGTH TABLE | |
322 ;; | |
323 ;; If the type of character property value is symbol, integer, | |
324 ;; boolean, or character, we use a char-table described here to store | |
325 ;; the values. | |
326 ;; | |
327 ;; The 4th extra slot is a vector of property values (VAL-TABLE), and | |
328 ;; values for succeeding 128 characters are encoded into this | |
329 ;; character sequence: | |
330 ;; ( VAL-CODE RUN-LENGTH ? ) + | |
331 ;; where: | |
332 ;; VAL-CODE (0..127): | |
333 ;; (VAL-CODE - 1) is an index into VAL-TABLE. | |
334 ;; The value 0 means no-value. | |
335 ;; RUN-LENGTH (130..255): | |
336 ;; (RUN-LENGTH - 128) specifies how many characters have the same | |
337 ;; value. If omitted, it means 1. | |
338 | |
339 | |
340 ;; Return a symbol-type character property value of CHAR. VAL is the | |
341 ;; current value of (aref TABLE CHAR). | |
342 | |
343 (defun unidata-get-symbol (char val table) | |
344 (let ((val-table (char-table-extra-slot table 4))) | |
345 (cond ((symbolp val) | |
346 val) | |
347 ((stringp val) | |
348 (let ((first-char (lsh (lsh char -7) 7)) | |
349 (str val) | |
350 (len (length val)) | |
351 (idx 0) | |
352 this-val count) | |
353 (set-char-table-range table (cons first-char (+ first-char 127)) | |
354 nil) | |
355 (while (< idx len) | |
356 (setq val (aref str idx) idx (1+ idx) | |
357 count (if (< idx len) (aref str idx) 1)) | |
358 (setq val (and (> val 0) (aref val-table (1- val))) | |
359 count (if (< count 128) | |
360 1 | |
361 (prog1 (- count 128) (setq idx (1+ idx))))) | |
362 (dotimes (i count) | |
363 (if val | |
364 (aset table first-char val)) | |
365 (if (= first-char char) | |
366 (setq this-val val)) | |
367 (setq first-char (1+ first-char)))) | |
368 this-val)) | |
369 ((> val 0) | |
370 (aref val-table (1- val)))))) | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
371 |
90086 | 372 ;; Return a integer-type character property value of CHAR. VAL is the |
373 ;; current value of (aref TABLE CHAR). | |
374 | |
375 (defun unidata-get-integer (char val table) | |
376 (let ((val-table (char-table-extra-slot table 4))) | |
377 (cond ((integerp val) | |
378 val) | |
379 ((stringp val) | |
380 (let ((first-char (lsh (lsh char -7) 7)) | |
381 (str val) | |
382 (len (length val)) | |
383 (idx 0) | |
384 this-val count) | |
385 (while (< idx len) | |
386 (setq val (aref str idx) idx (1+ idx) | |
387 count (if (< idx len) (aref str idx) 1)) | |
388 (setq val (and (> val 0) (aref val-table (1- val))) | |
389 count (if (< count 128) | |
390 1 | |
391 (prog1 (- count 128) (setq idx (1+ idx))))) | |
392 (dotimes (i count) | |
393 (aset table first-char val) | |
394 (if (= first-char char) | |
395 (setq this-val val)) | |
396 (setq first-char (1+ first-char)))) | |
397 this-val))))) | |
398 | |
100093
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
399 ;; Return a numeric-type (integer or float) character property value |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
400 ;; of CHAR. VAL is the current value of (aref TABLE CHAR). |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
401 |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
402 (defun unidata-get-numeric (char val table) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
403 (cond |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
404 ((numberp val) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
405 val) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
406 ((stringp val) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
407 (let ((val-table (char-table-extra-slot table 4)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
408 (first-char (lsh (lsh char -7) 7)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
409 (str val) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
410 (len (length val)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
411 (idx 0) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
412 this-val count) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
413 (while (< idx len) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
414 (setq val (aref str idx) idx (1+ idx) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
415 count (if (< idx len) (aref str idx) 1)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
416 (setq val (and (> val 0) (aref val-table (1- val))) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
417 count (if (< count 128) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
418 1 |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
419 (prog1 (- count 128) (setq idx (1+ idx))))) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
420 (dotimes (i count) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
421 (aset table first-char val) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
422 (if (= first-char char) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
423 (setq this-val val)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
424 (setq first-char (1+ first-char)))) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
425 this-val)))) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
426 |
90086 | 427 ;; Store VAL (symbol) as a character property value of CHAR in TABLE. |
428 | |
429 (defun unidata-put-symbol (char val table) | |
430 (or (symbolp val) | |
431 (error "Not a symbol: %S" val)) | |
432 (let ((current-val (aref table char))) | |
433 (unless (eq current-val val) | |
434 (if (stringp current-val) | |
435 (funcall (char-table-extra-slot table 1) char current-val table)) | |
436 (aset table char val)))) | |
437 | |
438 ;; Store VAL (integer) as a character property value of CHAR in TABLE. | |
439 | |
440 (defun unidata-put-integer (char val table) | |
441 (or (integerp val) | |
442 (not val) | |
443 (error "Not an integer nor nil: %S" val)) | |
444 (let ((current-val (aref table char))) | |
445 (unless (eq current-val val) | |
446 (if (stringp current-val) | |
447 (funcall (char-table-extra-slot table 1) char current-val table)) | |
448 (aset table char val)))) | |
449 | |
100093
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
450 ;; Store VAL (integer or float) as a character property value of CHAR |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
451 ;; in TABLE. |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
452 |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
453 (defun unidata-put-numeric (char val table) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
454 (or (numberp val) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
455 (not val) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
456 (error "Not a number nor nil: %S" val)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
457 (let ((current-val (aref table char))) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
458 (unless (equal current-val val) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
459 (if (stringp current-val) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
460 (funcall (char-table-extra-slot table 1) char current-val table)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
461 (aset table char val)))) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
462 |
90086 | 463 ;; Encode the character property value VAL into an integer value by |
464 ;; VAL-LIST. By side effect, VAL-LIST is modified. | |
465 ;; VAL-LIST has this form: | |
466 ;; (t (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...) | |
467 ;; If VAL is one of VALn, just return VAL-CODEn. Otherwise, | |
468 ;; VAL-LIST is modified to this: | |
469 ;; (t (VAL . (1+ VAL-CODE1)) (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...) | |
470 | |
471 (defun unidata-encode-val (val-list val) | |
100093
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
472 (let ((slot (assoc val val-list)) |
90086 | 473 val-code) |
474 (if slot | |
475 (cdr slot) | |
476 (setq val-code (if (cdr val-list) (1+ (cdr (nth 1 val-list))) 1)) | |
477 (setcdr val-list (cons (cons val val-code) (cdr val-list))) | |
478 val-code))) | |
479 | |
480 ;; Generate a char-table for the character property PROP. | |
481 | |
482 (defun unidata-gen-table (prop val-func default-value) | |
483 (let ((table (make-char-table 'char-code-property-table)) | |
484 (prop-idx (unidata-prop-index prop)) | |
485 (val-list (list t)) | |
486 (vec (make-vector 128 0)) | |
105873
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
487 tail elt range val val-code idx slot |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
488 prev-range-data) |
90086 | 489 (set-char-table-range table (cons 0 (max-char)) default-value) |
490 (setq tail unidata-list) | |
491 (while tail | |
492 (setq elt (car tail) tail (cdr tail)) | |
493 (setq range (car elt) | |
494 val (funcall val-func (nth prop-idx elt))) | |
495 (setq val-code (if val (unidata-encode-val val-list val))) | |
496 (if (consp range) | |
105873
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
497 (when val-code |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
498 (set-char-table-range table range val) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
499 (let ((from (car range)) (to (cdr range))) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
500 ;; If RANGE doesn't end at the char-table boundary (each |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
501 ;; 128 characters), we may have to carry over the data |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
502 ;; for the last several characters (at most 127 chars) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
503 ;; to the next loop. In that case, set PREV-RANGE-DATA |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
504 ;; to ((FROM . TO) . VAL-CODE) where (FROM . TO) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
505 ;; specifies the range of characters handled in the next |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
506 ;; loop. |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
507 (when (< (logand to #x7F) #x7F) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
508 (if (< from (logand to #x1FFF80)) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
509 (setq from (logand to #x1FFF80))) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
510 (setq prev-range-data (cons (cons from to) val-code))))) |
90086 | 511 (let* ((start (lsh (lsh range -7) 7)) |
512 (limit (+ start 127)) | |
513 str count new-val) | |
514 (fillarray vec 0) | |
105873
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
515 ;; See the comment above. |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
516 (when (and prev-range-data |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
517 (>= (cdr (car prev-range-data)) start)) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
518 (let ((from (car (car prev-range-data))) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
519 (to (cdr (car prev-range-data))) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
520 (vcode (cdr prev-range-data))) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
521 (while (<= from to) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
522 (aset vec (- from start) vcode) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
523 (setq from (1+ from))))) |
5626ccae11ed
(unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents:
103310
diff
changeset
|
524 (setq prev-range-data nil) |
90086 | 525 (if val-code |
526 (aset vec (- range start) val-code)) | |
527 (while (and (setq elt (car tail) range (car elt)) | |
528 (integerp range) | |
529 (<= range limit)) | |
530 (setq new-val (funcall val-func (nth prop-idx elt))) | |
531 (if (not (eq val new-val)) | |
532 (setq val new-val | |
533 val-code (if val (unidata-encode-val val-list val)))) | |
534 (if val-code | |
535 (aset vec (- range start) val-code)) | |
536 (setq tail (cdr tail))) | |
537 (setq str "" val-code -1 count 0) | |
538 (mapc #'(lambda (x) | |
539 (if (= val-code x) | |
540 (setq count (1+ count)) | |
541 (if (> count 2) | |
542 (setq str (concat str (string val-code | |
543 (+ count 128)))) | |
544 (if (= count 2) | |
545 (setq str (concat str (string val-code val-code))) | |
546 (if (= count 1) | |
547 (setq str (concat str (string val-code)))))) | |
548 (setq val-code x count 1))) | |
549 vec) | |
550 (if (= count 128) | |
551 (if val | |
552 (set-char-table-range table (cons start limit) val)) | |
553 (if (= val-code 0) | |
554 (set-char-table-range table (cons start limit) str) | |
555 (if (> count 2) | |
556 (setq str (concat str (string val-code (+ count 128)))) | |
557 (if (= count 2) | |
558 (setq str (concat str (string val-code val-code))) | |
559 (setq str (concat str (string val-code))))) | |
560 (set-char-table-range table (cons start limit) str)))))) | |
561 | |
562 (setq val-list (nreverse (cdr val-list))) | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
563 (set-char-table-extra-slot table 0 prop) |
90086 | 564 (set-char-table-extra-slot table 4 (vconcat (mapcar 'car val-list))) |
565 table)) | |
566 | |
567 (defun unidata-gen-table-symbol (prop) | |
568 (let ((table (unidata-gen-table prop | |
569 #'(lambda (x) (and (> (length x) 0) | |
570 (intern x))) | |
571 0))) | |
572 (byte-compile 'unidata-get-symbol) | |
573 (byte-compile 'unidata-put-symbol) | |
574 (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-symbol)) | |
575 (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-symbol)) | |
576 table)) | |
577 | |
578 (defun unidata-gen-table-integer (prop) | |
579 (let ((table (unidata-gen-table prop | |
580 #'(lambda (x) (and (> (length x) 0) | |
581 (string-to-number x))) | |
582 t))) | |
583 (byte-compile 'unidata-get-integer) | |
584 (byte-compile 'unidata-put-integer) | |
585 (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-integer)) | |
586 (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-integer)) | |
587 table)) | |
588 | |
100093
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
589 (defun unidata-gen-table-numeric (prop) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
590 (let ((table (unidata-gen-table prop |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
591 #'(lambda (x) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
592 (if (string-match "/" x) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
593 (/ (float (string-to-number x)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
594 (string-to-number |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
595 (substring x (match-end 0)))) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
596 (if (> (length x) 0) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
597 (string-to-number x)))) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
598 t))) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
599 (byte-compile 'unidata-get-numeric) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
600 (byte-compile 'unidata-put-numeric) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
601 (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-numeric)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
602 (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-numeric)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
603 table)) |
eae5b6774936
(unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents:
94829
diff
changeset
|
604 |
90086 | 605 |
606 ;; WORD-LIST TABLE | |
607 | |
608 ;; If the table is for `name' property, each character in the string | |
609 ;; is one of these: | |
610 ;; DIFF-HEAD-CODE (0, 1, or 2): | |
611 ;; specifies how to decode the following characters. | |
612 ;; WORD-CODE (3..#x7FF excluding '-', '0'..'9', 'A'..'Z'): | |
613 ;; specifies an index number into WORD-TABLE (see below) | |
614 ;; Otherwise (' ', '-', '0'..'9', 'A'..'Z'): | |
615 ;; specifies a literal word. | |
616 ;; | |
617 ;; The 4th slots is a vector: | |
618 ;; [ WORD-TABLE BLOCK-NAME HANGUL-JAMO-TABLE ] | |
619 ;; WORD-TABLE is a vector of word symbols. | |
620 ;; BLOCK-NAME is a vector of name symbols for a block of characters. | |
621 ;; HANGUL-JAMO-TABLE is `unidata-name-jamo-name-table'. | |
622 | |
623 ;; Return the difference of symbol list L1 and L2 in this form: | |
624 ;; (DIFF-HEAD SYM1 SYM2 ...) | |
625 ;; DIFF-HEAD is ((SAME-HEAD-LENGTH * 16) + SAME-TAIL-LENGTH). | |
626 ;; Ex: If L1 is (a b c d e f) and L2 is (a g h e f), this function | |
627 ;; returns ((+ (* 1 16) 2) g h). | |
628 ;; It means that we can get L2 from L1 by prepending the first element | |
629 ;; of L1 and appending the last 2 elements of L1 to the list (g h). | |
630 ;; If L1 and L2 don't have common elements at the head and tail, | |
631 ;; set DIFF-HEAD to -1 and SYM1 ... to the elements of L2. | |
632 | |
633 (defun unidata-word-list-diff (l1 l2) | |
634 (let ((beg 0) | |
635 (end 0) | |
636 (len1 (length l1)) | |
637 (len2 (length l2)) | |
638 result) | |
639 (when (< len1 16) | |
640 (while (and l1 (eq (car l1) (car l2))) | |
641 (setq beg (1+ beg) | |
642 l1 (cdr l1) len1 (1- len1) l2 (cdr l2) len2 (1- len2))) | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
643 (while (and (< end len1) (< end len2) |
90086 | 644 (eq (nth (- len1 end 1) l1) (nth (- len2 end 1) l2))) |
645 (setq end (1+ end)))) | |
646 (if (= (+ beg end) 0) | |
647 (setq result (list -1)) | |
648 (setq result (list (+ (* beg 16) (+ beg (- len1 end)))))) | |
649 (while (< end len2) | |
650 (setcdr result (cons (nth (- len2 end 1) l2) (cdr result))) | |
651 (setq end (1+ end))) | |
652 result)) | |
653 | |
654 ;; Return a compressed form of the vector VEC. Each element of VEC is | |
655 ;; a list of symbols of which names can be concatenated to form a | |
656 ;; character name. This function changes those elements into | |
657 ;; compressed forms by utilizing the fact that diff of consecutive | |
658 ;; elements is usually small. | |
659 | |
660 (defun unidata-word-list-compress (vec) | |
661 (let (last-elt last-idx diff-head tail elt val) | |
662 (dotimes (i 128) | |
663 (setq elt (aref vec i)) | |
664 (when elt | |
665 (if (null last-elt) | |
666 (setq diff-head -1 | |
667 val (cons 0 elt)) | |
668 (setq val (unidata-word-list-diff last-elt elt)) | |
669 (if (= (car val) -1) | |
670 (setq diff-head -1 | |
671 val (cons 0 (cdr val))) | |
672 (if (eq diff-head (car val)) | |
673 (setq val (cons 2 (cdr val))) | |
674 (setq diff-head (car val)) | |
675 (if (>= diff-head 0) | |
676 (setq val (cons 1 val)))))) | |
677 (aset vec i val) | |
678 (setq last-idx i last-elt elt))) | |
679 (if (not last-idx) | |
680 (setq vec nil) | |
681 (if (< last-idx 127) | |
682 (let ((shorter (make-vector (1+ last-idx) nil))) | |
683 (dotimes (i (1+ last-idx)) | |
684 (aset shorter i (aref vec i))) | |
685 (setq vec shorter)))) | |
686 vec)) | |
687 | |
688 ;; Encode the word index IDX into a characters code that can be | |
689 ;; embedded in a string. | |
690 | |
691 (defsubst unidata-encode-word (idx) | |
692 ;; Exclude 0, 1, 2. | |
693 (+ idx 3)) | |
694 | |
695 ;; Decode the character code CODE (that is embedded in a string) into | |
696 ;; the corresponding word name by looking up WORD-TABLE. | |
697 | |
698 (defsubst unidata-decode-word (code word-table) | |
699 (setq code (- code 3)) | |
700 (if (< code (length word-table)) | |
701 (aref word-table code))) | |
702 | |
703 ;; Table of short transliterated name symbols of Hangul Jamo divided | |
704 ;; into Choseong, Jungseong, and Jongseong. | |
705 | |
706 (defconst unidata-name-jamo-name-table | |
707 [[G GG N D DD R M B BB S SS nil J JJ C K T P H] | |
708 [A AE YA YAE EO E YEO YE O WA WAE OE YO U WEO WE WI YU EU YI I] | |
709 [G GG GS N NJ NH D L LG LM LB LS LT LP LH M B BS S SS NG J C K T P H]]) | |
710 | |
711 ;; Return a name of CHAR. VAL is the current value of (aref TABLE | |
712 ;; CHAR). | |
713 | |
714 (defun unidata-get-name (char val table) | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
715 (cond |
90086 | 716 ((stringp val) |
717 (if (> (aref val 0) 0) | |
718 val | |
719 (let* ((first-char (lsh (lsh char -7) 7)) | |
720 (word-table (aref (char-table-extra-slot table 4) 0)) | |
721 (i 1) | |
722 (len (length val)) | |
723 (vec (make-vector 128 nil)) | |
724 (idx 0) | |
725 (case-fold-search nil) | |
726 c word-list tail-list last-list word diff-head) | |
727 (while (< i len) | |
728 (setq c (aref val i)) | |
729 (if (< c 3) | |
730 (progn | |
731 (if (or word-list tail-list) | |
732 (aset vec idx | |
733 (setq last-list (nconc word-list tail-list)))) | |
734 (setq i (1+ i) idx (1+ idx) | |
735 word-list nil tail-list nil) | |
736 (if (> c 0) | |
737 (let ((l last-list)) | |
738 (if (= c 1) | |
739 (setq diff-head | |
740 (prog1 (aref val i) (setq i (1+ i))))) | |
741 (setq tail-list (nthcdr (% diff-head 16) last-list)) | |
742 (dotimes (i (/ diff-head 16)) | |
743 (setq word-list (nconc word-list (list (car l))) | |
744 l (cdr l)))))) | |
745 (setq word-list | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
746 (nconc word-list |
90086 | 747 (list (symbol-name |
748 (unidata-decode-word c word-table)))) | |
749 i (1+ i)))) | |
750 (if (or word-list tail-list) | |
751 (aset vec idx (nconc word-list tail-list))) | |
752 (setq val nil) | |
753 (dotimes (i 128) | |
754 (setq c (+ first-char i)) | |
755 (let ((name (aref vec i))) | |
756 (if name | |
757 (let ((tail (cdr (setq name (copy-sequence name)))) | |
758 elt) | |
759 (while tail | |
760 (setq elt (car tail)) | |
761 (or (string= elt "-") | |
762 (progn | |
763 (setcdr tail (cons elt (cdr tail))) | |
764 (setcar tail " "))) | |
765 (setq tail (cddr tail))) | |
766 (setq name (apply 'concat name)))) | |
767 (aset table c name) | |
768 (if (= c char) | |
769 (setq val name)))) | |
770 val))) | |
771 | |
772 ((and (integerp val) (> val 0)) | |
773 (let* ((symbol-table (aref (char-table-extra-slot table 4) 1)) | |
774 (sym (aref symbol-table (1- val)))) | |
775 (cond ((eq sym 'HANGUL\ SYLLABLE) | |
776 (let ((jamo-name-table (aref (char-table-extra-slot table 4) 2))) | |
777 ;; SIndex = S - SBase | |
778 (setq char (- char #xAC00)) | |
779 (let ( ;; LIndex = SIndex / NCount | |
780 (L (/ char 588)) | |
781 ;; VIndex = (SIndex % NCount) * TCount | |
782 (V (/ (% char 588) 28)) | |
783 ;; TIndex = SIndex % TCount | |
784 (T (% char 28))) | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
785 (format "HANGUL SYLLABLE %s%s%s" |
90112
eb1b00df002b
(unidata-get-name): Handle U+110B.
Kenichi Handa <handa@m17n.org>
parents:
90109
diff
changeset
|
786 ;; U+110B is nil in this table. |
eb1b00df002b
(unidata-get-name): Handle U+110B.
Kenichi Handa <handa@m17n.org>
parents:
90109
diff
changeset
|
787 (or (aref (aref jamo-name-table 0) L) "") |
90086 | 788 (aref (aref jamo-name-table 1) V) |
789 (if (= T 0) "" | |
790 (aref (aref jamo-name-table 2) (1- T))))))) | |
90109
790c49ba39db
(unidata-get-name): Handle "CJK IDEOGRAPH".
Kenichi Handa <handa@m17n.org>
parents:
90097
diff
changeset
|
791 ((eq sym 'CJK\ IDEOGRAPH) |
790c49ba39db
(unidata-get-name): Handle "CJK IDEOGRAPH".
Kenichi Handa <handa@m17n.org>
parents:
90097
diff
changeset
|
792 (format "%s-%04X" sym char)) |
90086 | 793 ((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH) |
794 (format "%s-%04X" sym char)) | |
795 ((eq sym 'VARIATION\ SELECTOR) | |
796 (format "%s-%d" sym (+ (- char #xe0100) 17)))))))) | |
797 | |
798 ;; Store VAL as the name of CHAR in TABLE. | |
799 | |
800 (defun unidata-put-name (char val table) | |
801 (let ((current-val (aref table char))) | |
802 (if (and (stringp current-val) (= (aref current-val 0) 0)) | |
803 (funcall (char-table-extra-slot table 1) char current-val table)) | |
804 (aset table char val))) | |
805 | |
806 (defun unidata-get-decomposition (char val table) | |
807 (cond | |
808 ((consp val) | |
809 val) | |
810 | |
811 ((stringp val) | |
812 (if (> (aref val 0) 0) | |
813 val | |
814 (let* ((first-char (lsh (lsh char -7) 7)) | |
815 (word-table (char-table-extra-slot table 4)) | |
816 (i 1) | |
817 (len (length val)) | |
818 (vec (make-vector 128 nil)) | |
819 (idx 0) | |
820 (case-fold-search nil) | |
821 c word-list tail-list last-list word diff-head) | |
822 (while (< i len) | |
823 (setq c (aref val i)) | |
824 (if (< c 3) | |
825 (progn | |
826 (if (or word-list tail-list) | |
827 (aset vec idx | |
828 (setq last-list (nconc word-list tail-list)))) | |
829 (setq i (1+ i) idx (1+ idx) | |
830 word-list nil tail-list nil) | |
831 (if (> c 0) | |
832 (let ((l last-list)) | |
833 (if (= c 1) | |
834 (setq diff-head | |
835 (prog1 (aref val i) (setq i (1+ i))))) | |
836 (setq tail-list (nthcdr (% diff-head 16) last-list)) | |
837 (dotimes (i (/ diff-head 16)) | |
838 (setq word-list (nconc word-list (list (car l))) | |
839 l (cdr l)))))) | |
840 (setq word-list | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
841 (nconc word-list |
90086 | 842 (list (or (unidata-decode-word c word-table) c))) |
843 i (1+ i)))) | |
844 (if (or word-list tail-list) | |
845 (aset vec idx (nconc word-list tail-list))) | |
846 (dotimes (i 128) | |
847 (aset table (+ first-char i) (aref vec i))) | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
848 (aref vec (- char first-char))))) |
90086 | 849 |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
850 ;; Hangul syllable |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
851 ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3)) |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
852 ;; SIndex = S (char) - SBase (#xAC00) |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
853 (setq char (- char #xAC00)) |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
854 (let (;; L = LBase + SIndex / NCount |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
855 (L (+ #x1100 (/ char 588))) |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
856 ;; V = VBase + (SIndex % NCount) * TCount |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
857 (V (+ #x1161 (/ (% char 588) 28))) |
103033
0dd3b08296b6
(unidata-get-decomposition): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
103009
diff
changeset
|
858 ;; LV = SBase + (SIndex / TCount) * TCount |
0dd3b08296b6
(unidata-get-decomposition): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
103009
diff
changeset
|
859 (LV (+ #xAC00 (* (/ char 28) 28))) |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
860 ;; T = TBase + SIndex % TCount |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
861 (T (+ #x11A7 (% char 28)))) |
90176
dfbe37c15be3
(unidata-get-decomposition): For Hangul
Kenichi Handa <handa@m17n.org>
parents:
90174
diff
changeset
|
862 (if (= T #x11A7) |
dfbe37c15be3
(unidata-get-decomposition): For Hangul
Kenichi Handa <handa@m17n.org>
parents:
90174
diff
changeset
|
863 (list L V) |
103009
96e8e1d84170
(unidata-get-decomposition): Adjust Hangle decomposition rule to
Kenichi Handa <handa@m17n.org>
parents:
102901
diff
changeset
|
864 (list LV T)))) |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
865 |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
866 )) |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
867 |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
868 ;; Store VAL as the decomposition information of CHAR in TABLE. |
90086 | 869 |
870 (defun unidata-put-decomposition (char val table) | |
871 (let ((current-val (aref table char))) | |
872 (if (and (stringp current-val) (= (aref current-val 0) 0)) | |
873 (funcall (char-table-extra-slot table 1) char current-val table)) | |
874 (aset table char val))) | |
875 | |
876 ;; UnicodeData.txt contains these lines: | |
877 ;; 0000;<control>;Cc;0;BN;;;;;N;NULL;;;; | |
878 ;; ... | |
879 ;; 0020;SPACE;Zs;0;WS;;;;;N;;;;; | |
880 ;; ... | |
881 ;; The following command yields a file of about 96K bytes. | |
882 ;; % gawk -F ';' '{print $1,$2;}' < UnicodeData.txt | gzip > temp.gz | |
883 ;; With the following function, we can get a file of almost the same | |
884 ;; the size. | |
885 | |
886 ;; Generate a char-table for character names. | |
887 | |
888 (defun unidata-gen-table-word-list (prop val-func) | |
889 (let ((table (make-char-table 'char-code-property-table)) | |
890 (prop-idx (unidata-prop-index prop)) | |
891 (word-list (list nil)) | |
892 word-table | |
893 block-list block-word-table block-end | |
894 tail elt range val idx slot) | |
895 (set-char-table-range table (cons 0 (max-char)) 0) | |
896 (setq tail unidata-list) | |
897 (setq block-end -1) | |
898 (while tail | |
899 (setq elt (car tail) tail (cdr tail)) | |
900 (setq range (car elt) | |
901 val (funcall val-func (nth prop-idx elt))) | |
902 ;; Treat the sequence of "CJK COMPATIBILITY IDEOGRAPH-XXXX" and | |
903 ;; "VARIATION SELECTOR-XXX" as a block. | |
904 (if (and (consp val) (eq prop 'name) | |
905 (or (and (eq (car val) 'CJK) | |
906 (eq (nth 1 val) 'COMPATIBILITY)) | |
907 (and (>= range #xe0100) | |
908 (eq (car val) 'VARIATION) | |
909 (eq (nth 1 val) 'SELECTOR)))) | |
910 (let ((first (car val)) | |
911 (second (nth 1 val)) | |
912 (start range)) | |
913 (while (and (setq elt (car tail) range (car elt) | |
914 val (funcall val-func (nth prop-idx elt))) | |
915 (consp val) | |
916 (eq first (car val)) | |
917 (eq second (nth 1 val))) | |
918 (setq block-end range | |
919 tail (cdr tail))) | |
920 (setq range (cons start block-end) | |
921 val (if (eq first 'CJK) 'CJK\ COMPATIBILITY\ IDEOGRAPH | |
922 'VARIATION\ SELECTOR)))) | |
923 | |
924 (if (consp range) | |
925 (if val | |
926 (let ((slot (assq val block-list))) | |
927 (setq range (cons (car range) (cdr range))) | |
928 (setq block-end (cdr range)) | |
929 (if slot | |
930 (nconc slot (list range)) | |
931 (push (list val range) block-list)))) | |
932 (let* ((start (lsh (lsh range -7) 7)) | |
933 (limit (+ start 127)) | |
934 (first tail) | |
935 (vec (make-vector 128 nil)) | |
936 c name len) | |
937 (if (<= start block-end) | |
938 ;; START overlap with the previous block. | |
939 (aset table range (nth prop-idx elt)) | |
940 (if val | |
941 (aset vec (- range start) val)) | |
942 (while (and (setq elt (car tail) range (car elt)) | |
943 (integerp range) | |
944 (<= range limit)) | |
945 (setq val (funcall val-func (nth prop-idx elt))) | |
946 (if val | |
947 (aset vec (- range start) val)) | |
948 (setq tail (cdr tail))) | |
949 (setq vec (unidata-word-list-compress vec)) | |
950 (when vec | |
951 (dotimes (i (length vec)) | |
952 (dolist (elt (aref vec i)) | |
953 (if (symbolp elt) | |
954 (let ((slot (assq elt word-list))) | |
955 (if slot | |
956 (setcdr slot (1+ (cdr slot))) | |
957 (setcdr word-list | |
958 (cons (cons elt 1) (cdr word-list)))))))) | |
959 (set-char-table-range table (cons start limit) vec)))))) | |
960 (setq word-list (sort (cdr word-list) | |
961 #'(lambda (x y) (> (cdr x) (cdr y))))) | |
962 (setq tail word-list idx 0) | |
963 (while tail | |
964 (setcdr (car tail) (unidata-encode-word idx)) | |
965 (setq idx (1+ idx) tail (cdr tail))) | |
966 (setq word-table (make-vector (length word-list) nil)) | |
967 (setq idx 0) | |
968 (dolist (elt word-list) | |
969 (aset word-table idx (car elt)) | |
970 (setq idx (1+ idx))) | |
971 | |
972 (if (and (eq prop 'decomposition) | |
973 (> idx 32)) | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
974 (error "Too many symbols in decomposition data")) |
90086 | 975 |
976 (dotimes (i (/ #x110000 128)) | |
977 (let* ((idx (* i 128)) | |
978 (vec (aref table idx))) | |
979 (when (vectorp vec) | |
980 (dotimes (i (length vec)) | |
981 (let ((tail (aref vec i)) | |
982 elt code) | |
983 (if (not tail) | |
984 (aset vec i "\0") | |
985 (while tail | |
986 (setq elt (car tail) | |
987 code (if (integerp elt) elt | |
988 (cdr (assq elt word-list)))) | |
989 (setcar tail (string code)) | |
990 (setq tail (cdr tail))) | |
991 (aset vec i (mapconcat 'identity (aref vec i) ""))))) | |
992 (set-char-table-range | |
993 table (cons idx (+ idx 127)) | |
994 (mapconcat 'identity vec ""))))) | |
995 | |
996 (setq block-word-table (make-vector (length block-list) nil)) | |
997 (setq idx 0) | |
998 (dolist (elt block-list) | |
999 (dolist (e (cdr elt)) | |
1000 (set-char-table-range table e (1+ idx))) | |
1001 (aset block-word-table idx (car elt)) | |
1002 (setq idx (1+ idx))) | |
1003 | |
1004 (set-char-table-extra-slot table 0 prop) | |
1005 (set-char-table-extra-slot table 4 (cons word-table block-word-table)) | |
1006 table)) | |
1007 | |
1008 (defun unidata-split-name (str) | |
1009 (if (symbolp str) | |
1010 str | |
1011 (let ((len (length str)) | |
1012 (l nil) | |
1013 (idx 0) | |
1014 c) | |
1015 (if (= len 0) | |
1016 nil | |
1017 (dotimes (i len) | |
1018 (setq c (aref str i)) | |
1019 (if (= c 32) | |
1020 (setq l (cons (intern (substring str idx i)) l) | |
1021 idx (1+ i)) | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
1022 (if (and (= c ?-) (< idx i) |
90086 | 1023 (< (1+ i) len) (/= (aref str (1+ i)) 32)) |
1024 (setq l (cons '- (cons (intern (substring str idx i)) l)) | |
1025 idx (1+ i))))) | |
1026 (nreverse (cons (intern (substring str idx)) l)))))) | |
1027 | |
1028 (defun unidata-gen-table-name (prop) | |
1029 (let* ((table (unidata-gen-table-word-list prop 'unidata-split-name)) | |
1030 (word-tables (char-table-extra-slot table 4))) | |
1031 (byte-compile 'unidata-get-name) | |
1032 (byte-compile 'unidata-put-name) | |
1033 (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-name)) | |
1034 (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-name)) | |
1035 | |
1036 (if (eq prop 'name) | |
1037 (set-char-table-extra-slot table 4 | |
1038 (vector (car word-tables) | |
1039 (cdr word-tables) | |
1040 unidata-name-jamo-name-table)) | |
1041 (set-char-table-extra-slot table 4 | |
1042 (vector (car word-tables)))) | |
1043 table)) | |
1044 | |
1045 (defun unidata-split-decomposition (str) | |
1046 (if (symbolp str) | |
1047 str | |
1048 (let ((len (length str)) | |
1049 (l nil) | |
1050 (idx 0) | |
1051 c) | |
1052 (if (= len 0) | |
1053 nil | |
1054 (dotimes (i len) | |
1055 (setq c (aref str i)) | |
1056 (if (= c 32) | |
1057 (setq l (if (= (aref str idx) ?<) | |
100107
eff1b0128211
(unidata-prop-alist): Docstring for
Kenichi Handa <handa@m17n.org>
parents:
100094
diff
changeset
|
1058 (cons (intern (substring str (1+ idx) (1- i))) l) |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
1059 (cons (string-to-number (substring str idx i) 16) l)) |
90086 | 1060 idx (1+ i)))) |
1061 (if (= (aref str idx) ?<) | |
100107
eff1b0128211
(unidata-prop-alist): Docstring for
Kenichi Handa <handa@m17n.org>
parents:
100094
diff
changeset
|
1062 (setq l (cons (intern (substring str (1+ idx) (1- len))) l)) |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
1063 (setq l (cons (string-to-number (substring str idx len) 16) l))) |
90086 | 1064 (nreverse l))))) |
1065 | |
1066 | |
1067 (defun unidata-gen-table-decomposition (prop) | |
1068 (let* ((table (unidata-gen-table-word-list prop 'unidata-split-decomposition)) | |
1069 (word-tables (char-table-extra-slot table 4))) | |
1070 (byte-compile 'unidata-get-decomposition) | |
1071 (byte-compile 'unidata-put-decomposition) | |
1072 (set-char-table-extra-slot table 1 | |
1073 (symbol-function 'unidata-get-decomposition)) | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
1074 (set-char-table-extra-slot table 2 |
90086 | 1075 (symbol-function 'unidata-put-decomposition)) |
1076 (set-char-table-extra-slot table 4 (car word-tables)) | |
1077 table)) | |
1078 | |
1079 | |
1080 | |
1081 (defun unidata-describe-general-category (val) | |
1082 (cdr (assq val | |
1083 '((Lu . "Letter, Uppercase") | |
1084 (Ll . "Letter, Lowercase") | |
1085 (Lt . "Letter, Titlecase") | |
1086 (Lm . "Letter, Modifier") | |
1087 (Lo . "Letter, Other") | |
1088 (Mn . "Mark, Nonspacing") | |
1089 (Mc . "Mark, Spacing Combining") | |
1090 (Me . "Mark, Enclosing") | |
1091 (Nd . "Number, Decimal Digit") | |
1092 (Nl . "Number, Letter") | |
1093 (No . "Number, Other") | |
1094 (Pc . "Punctuation, Connector") | |
1095 (Pd . "Punctuation, Dash") | |
1096 (Ps . "Punctuation, Open") | |
1097 (Pe . "Punctuation, Close") | |
1098 (Pi . "Punctuation, Initial quote") | |
1099 (Pf . "Punctuation, Final quote") | |
1100 (Po . "Punctuation, Other") | |
1101 (Sm . "Symbol, Math") | |
1102 (Sc . "Symbol, Currency") | |
1103 (Sk . "Symbol, Modifier") | |
1104 (So . "Symbol, Other") | |
1105 (Zs . "Separator, Space") | |
1106 (Zl . "Separator, Line") | |
1107 (Zp . "Separator, Paragraph") | |
1108 (Cc . "Other, Control") | |
1109 (Cf . "Other, Format") | |
1110 (Cs . "Other, Surrogate") | |
1111 (Co . "Other, Private Use") | |
1112 (Cn . "Other, Not Assigned"))))) | |
1113 | |
1114 (defun unidata-describe-canonical-combining-class (val) | |
1115 (cdr (assq val | |
1116 '((0 . "Spacing, split, enclosing, reordrant, and Tibetan subjoined") | |
1117 (1 . "Overlays and interior") | |
1118 (7 . "Nuktas") | |
1119 (8 . "Hiragana/Katakana voicing marks") | |
1120 (9 . "Viramas") | |
1121 (10 . "Start of fixed position classes") | |
1122 (199 . "End of fixed position classes") | |
1123 (200 . "Below left attached") | |
1124 (202 . "Below attached") | |
1125 (204 . "Below right attached") | |
1126 (208 . "Left attached (reordrant around single base character)") | |
1127 (210 . "Right attached") | |
1128 (212 . "Above left attached") | |
1129 (214 . "Above attached") | |
1130 (216 . "Above right attached") | |
1131 (218 . "Below left") | |
1132 (220 . "Below") | |
1133 (222 . "Below right") | |
1134 (224 . "Left (reordrant around single base character)") | |
1135 (226 . "Right") | |
1136 (228 . "Above left") | |
1137 (230 . "Above") | |
1138 (232 . "Above right") | |
1139 (233 . "Double below") | |
1140 (234 . "Double above") | |
1141 (240 . "Below (iota subscript)"))))) | |
1142 | |
1143 (defun unidata-describe-bidi-class (val) | |
1144 (cdr (assq val | |
1145 '((L . "Left-to-Right") | |
1146 (LRE . "Left-to-Right Embedding") | |
1147 (LRO . "Left-to-Right Override") | |
1148 (R . "Right-to-Left") | |
1149 (AL . "Right-to-Left Arabic") | |
1150 (RLE . "Right-to-Left Embedding") | |
1151 (RLO . "Right-to-Left Override") | |
1152 (PDF . "Pop Directional Format") | |
1153 (EN . "European Number") | |
1154 (ES . "European Number Separator") | |
1155 (ET . "European Number Terminator") | |
1156 (AN . "Arabic Number") | |
1157 (CS . "Common Number Separator") | |
1158 (NSM . "Non-Spacing Mark") | |
1159 (BN . "Boundary Neutral") | |
1160 (B . "Paragraph Separator") | |
1161 (S . "Segment Separator") | |
1162 (WS . "Whitespace") | |
1163 (ON . "Other Neutrals"))))) | |
1164 | |
1165 (defun unidata-describe-decomposition (val) | |
102901
3c5ac8f47c5d
(unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
1166 (mapconcat |
3c5ac8f47c5d
(unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
1167 #'(lambda (x) |
3c5ac8f47c5d
(unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
1168 (if (symbolp x) (symbol-name x) |
3c5ac8f47c5d
(unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
1169 (concat (string ?') |
3c5ac8f47c5d
(unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
1170 (compose-string (string x) 0 1 (string ?\t x ?\t)) |
3c5ac8f47c5d
(unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
1171 (string ?')))) |
3c5ac8f47c5d
(unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
1172 val " ")) |
90086 | 1173 |
1174 ;; Verify if we can retrieve correct values from the generated | |
1175 ;; char-tables. | |
1176 | |
1177 (defun unidata-check () | |
1178 (dolist (elt unidata-prop-alist) | |
1179 (let* ((prop (car elt)) | |
1180 (index (unidata-prop-index prop)) | |
1181 (generator (unidata-prop-generator prop)) | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
1182 (table (progn |
90086 | 1183 (message "Generating %S table..." prop) |
1184 (funcall generator prop))) | |
1185 (decoder (char-table-extra-slot table 1)) | |
1186 (check #x400)) | |
1187 (dolist (e unidata-list) | |
1188 (let ((char (car e)) | |
1189 (val1 (nth index e)) | |
1190 val2) | |
1191 (if (and (stringp val1) (= (length val1) 0)) | |
1192 (setq val1 nil)) | |
1193 (unless (consp char) | |
1194 (setq val2 (funcall decoder char (aref table char) table)) | |
1195 (if val1 | |
1196 (cond ((eq generator 'unidata-gen-table-symbol) | |
1197 (setq val1 (intern val1))) | |
1198 ((eq generator 'unidata-gen-table-integer) | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
1199 (setq val1 (string-to-number val1))) |
90086 | 1200 ((eq generator 'unidata-gen-table-character) |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
1201 (setq val1 (string-to-number val1 16))) |
90086 | 1202 ((eq generator 'unidata-gen-table-decomposition) |
1203 (setq val1 (unidata-split-decomposition val1))))) | |
1204 (when (>= char check) | |
1205 (message "%S %04X" prop check) | |
1206 (setq check (+ check #x400))) | |
1207 (or (equal val1 val2) | |
94071
03be13c38b12
(unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents:
91421
diff
changeset
|
1208 (insert (format "> %04X %S\n< %04X %S\n" |
90086 | 1209 char val1 char val2))) |
1210 (sit-for 0))))))) | |
1211 | |
1212 ;; The entry function. It generates files described in the header | |
1213 ;; comment of this file. | |
1214 | |
90174
f3973ae57d8b
(unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents:
90162
diff
changeset
|
1215 (defun unidata-gen-files (&optional unidata-text-file) |
f3973ae57d8b
(unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents:
90162
diff
changeset
|
1216 (or unidata-text-file |
f3973ae57d8b
(unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents:
90162
diff
changeset
|
1217 (setq unidata-text-file (car command-line-args-left) |
f3973ae57d8b
(unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents:
90162
diff
changeset
|
1218 command-line-args-left (cdr command-line-args-left))) |
f3973ae57d8b
(unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents:
90162
diff
changeset
|
1219 (unidata-setup-list unidata-text-file) |
91405
30275283f62d
(unidata-gen-files): Force unix line ends.
Jason Rumney <jasonr@gnu.org>
parents:
90176
diff
changeset
|
1220 (let ((coding-system-for-write 'utf-8-unix) |
90086 | 1221 (charprop-file "charprop.el")) |
1222 (with-temp-file charprop-file | |
1223 (insert ";; Automatically generated by unidata-gen.el.\n") | |
1224 (dolist (elt unidata-prop-alist) | |
1225 (let* ((prop (car elt)) | |
1226 (generator (unidata-prop-generator prop)) | |
1227 (file (unidata-prop-file prop)) | |
1228 (docstring (unidata-prop-docstring prop)) | |
1229 (describer (unidata-prop-describer prop)) | |
1230 table) | |
1231 ;; Filename in this comment line is extracted by sed in | |
1232 ;; Makefile. | |
1233 (insert (format ";; FILE: %s\n" file)) | |
1234 (insert (format "(define-char-code-property '%S %S\n %S)\n" | |
1235 prop file docstring)) | |
1236 (with-temp-file file | |
1237 (message "Generating %s..." file) | |
1238 (setq table (funcall generator prop)) | |
1239 (when describer | |
1240 (unless (subrp (symbol-function describer)) | |
1241 (byte-compile describer) | |
1242 (setq describer (symbol-function describer))) | |
1243 (set-char-table-extra-slot table 3 describer)) | |
112219
294500476da2
Revert accidental update of non-FSF copyright from r99313.
Glenn Morris <rgm@gnu.org>
parents:
112218
diff
changeset
|
1244 (insert ";; Copyright (C) 1991-2009 Unicode, Inc. |
103310
e1613d164d67
Add copyright header.
Chong Yidong <cyd@stupidchicken.com>
parents:
103033
diff
changeset
|
1245 ;; This file was generated from the Unicode data file at |
e1613d164d67
Add copyright header.
Chong Yidong <cyd@stupidchicken.com>
parents:
103033
diff
changeset
|
1246 ;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt. |
e1613d164d67
Add copyright header.
Chong Yidong <cyd@stupidchicken.com>
parents:
103033
diff
changeset
|
1247 ;; See lisp/international/README for the copyright and permission notice.\n" |
90086 | 1248 (format "(define-char-code-property '%S %S %S)\n" |
1249 prop table docstring) | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
1250 ";; Local Variables:\n" |
90086 | 1251 ";; coding: utf-8\n" |
1252 ";; no-byte-compile: t\n" | |
1253 ";; End:\n\n" | |
1254 (format ";; %s ends here\n" file))))) | |
1255 (message "Writing %s..." charprop-file) | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
1256 (insert ";; Local Variables:\n" |
90086 | 1257 ";; coding: utf-8\n" |
1258 ";; no-byte-compile: t\n" | |
1259 ";; End:\n\n" | |
1260 (format ";; %s ends here\n" charprop-file))))) | |
1261 | |
90162
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
1262 |
88ca83396d15
Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents:
90112
diff
changeset
|
1263 |
90086 | 1264 ;;; unidata-gen.el ends here |