1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997,98,99,2000,2001,2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
29 #include <gconv_int.h>
30 #include <gconv_charset.h>
33 /* Simple data structure for alias mapping. We have two names, `from'
35 void *__gconv_alias_db
;
37 /* Array with available modules. */
38 struct gconv_module
*__gconv_modules_db
;
40 /* We modify global data. */
41 __libc_lock_define_initialized (static, lock
)
44 /* Provide access to module database. */
46 __gconv_get_modules_db (void)
48 return __gconv_modules_db
;
52 __gconv_get_alias_db (void)
54 return __gconv_alias_db
;
58 /* Function for searching alias. */
60 __gconv_alias_compare (const void *p1
, const void *p2
)
62 const struct gconv_alias
*s1
= (const struct gconv_alias
*) p1
;
63 const struct gconv_alias
*s2
= (const struct gconv_alias
*) p2
;
64 return strcmp (s1
->fromname
, s2
->fromname
);
68 /* To search for a derivation we create a list of intermediate steps.
69 Each element contains a pointer to the element which precedes it
70 in the derivation order. */
71 struct derivation_step
73 const char *result_set
;
74 size_t result_set_len
;
77 struct gconv_module
*code
;
78 struct derivation_step
*last
;
79 struct derivation_step
*next
;
82 #define NEW_STEP(result, hi, lo, module, last_mod) \
83 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
84 newp->result_set = result; \
85 newp->result_set_len = strlen (result); \
88 newp->code = module; \
89 newp->last = last_mod; \
94 /* If a specific transformation is used more than once we should not need
95 to start looking for it again. Instead cache each successful result. */
96 struct known_derivation
100 struct __gconv_step
*steps
;
104 /* Compare function for database of found derivations. */
106 derivation_compare (const void *p1
, const void *p2
)
108 const struct known_derivation
*s1
= (const struct known_derivation
*) p1
;
109 const struct known_derivation
*s2
= (const struct known_derivation
*) p2
;
112 result
= strcmp (s1
->from
, s2
->from
);
114 result
= strcmp (s1
->to
, s2
->to
);
118 /* The search tree for known derivations. */
119 static void *known_derivations
;
121 /* Look up whether given transformation was already requested before. */
124 derivation_lookup (const char *fromset
, const char *toset
,
125 struct __gconv_step
**handle
, size_t *nsteps
)
127 struct known_derivation key
= { fromset
, toset
, NULL
, 0 };
128 struct known_derivation
**result
;
130 result
= __tfind (&key
, &known_derivations
, derivation_compare
);
133 return __GCONV_NOCONV
;
135 *handle
= (*result
)->steps
;
136 *nsteps
= (*result
)->nsteps
;
138 /* Please note that we return GCONV_OK even if the last search for
139 this transformation was unsuccessful. */
143 /* Add new derivation to list of known ones. */
146 add_derivation (const char *fromset
, const char *toset
,
147 struct __gconv_step
*handle
, size_t nsteps
)
149 struct known_derivation
*new_deriv
;
150 size_t fromset_len
= strlen (fromset
) + 1;
151 size_t toset_len
= strlen (toset
) + 1;
153 new_deriv
= (struct known_derivation
*)
154 malloc (sizeof (struct known_derivation
) + fromset_len
+ toset_len
);
155 if (new_deriv
!= NULL
)
157 new_deriv
->from
= (char *) (new_deriv
+ 1);
158 new_deriv
->to
= memcpy (__mempcpy (new_deriv
+ 1, fromset
, fromset_len
),
161 new_deriv
->steps
= handle
;
162 new_deriv
->nsteps
= nsteps
;
164 if (__tsearch (new_deriv
, &known_derivations
, derivation_compare
)
166 /* There is some kind of memory allocation problem. */
169 /* Please note that we don't complain if the allocation failed. This
170 is not tragically but in case we use the memory debugging facilities
171 not all memory will be freed. */
175 free_derivation (void *p
)
177 struct known_derivation
*deriv
= (struct known_derivation
*) p
;
180 for (cnt
= 0; cnt
< deriv
->nsteps
; ++cnt
)
181 if (deriv
->steps
[cnt
].__counter
> 0
182 && deriv
->steps
[cnt
].__end_fct
!= NULL
)
183 DL_CALL_FCT (deriv
->steps
[cnt
].__end_fct
, (&deriv
->steps
[cnt
]));
185 /* Free the name strings. */
186 free ((char *) deriv
->steps
[0].__from_name
);
187 free ((char *) deriv
->steps
[deriv
->nsteps
- 1].__to_name
);
189 free ((struct __gconv_step
*) deriv
->steps
);
194 /* Decrement the reference count for a single step in a steps array. */
197 __gconv_release_step (struct __gconv_step
*step
)
199 if (--step
->__counter
== 0)
201 /* Call the destructor. */
202 if (step
->__end_fct
!= NULL
)
203 DL_CALL_FCT (step
->__end_fct
, (step
));
206 /* Skip builtin modules; they are not reference counted. */
207 if (step
->__shlib_handle
!= NULL
)
209 /* Release the loaded module. */
210 __gconv_release_shlib (step
->__shlib_handle
);
211 step
->__shlib_handle
= NULL
;
219 gen_steps (struct derivation_step
*best
, const char *toset
,
220 const char *fromset
, struct __gconv_step
**handle
, size_t *nsteps
)
223 struct __gconv_step
*result
;
224 struct derivation_step
*current
;
225 int status
= __GCONV_NOMEM
;
227 /* First determine number of steps. */
228 for (current
= best
; current
->last
!= NULL
; current
= current
->last
)
231 result
= (struct __gconv_step
*) malloc (sizeof (struct __gconv_step
)
240 while (step_cnt
-- > 0)
242 result
[step_cnt
].__from_name
= (step_cnt
== 0
244 : (char *)current
->last
->result_set
);
245 result
[step_cnt
].__to_name
= (step_cnt
+ 1 == *nsteps
246 ? __strdup (current
->result_set
)
247 : result
[step_cnt
+ 1].__from_name
);
249 result
[step_cnt
].__counter
= 1;
250 result
[step_cnt
].__data
= NULL
;
253 if (current
->code
->module_name
[0] == '/')
255 /* Load the module, return handle for it. */
256 struct __gconv_loaded_object
*shlib_handle
=
257 __gconv_find_shlib (current
->code
->module_name
);
259 if (shlib_handle
== NULL
)
265 result
[step_cnt
].__shlib_handle
= shlib_handle
;
266 result
[step_cnt
].__modname
= shlib_handle
->name
;
267 result
[step_cnt
].__fct
= shlib_handle
->fct
;
268 result
[step_cnt
].__init_fct
= shlib_handle
->init_fct
;
269 result
[step_cnt
].__end_fct
= shlib_handle
->end_fct
;
271 /* These settings can be overridden by the init function. */
272 result
[step_cnt
].__btowc_fct
= NULL
;
274 /* Call the init function. */
275 if (result
[step_cnt
].__init_fct
!= NULL
)
277 status
= DL_CALL_FCT (result
[step_cnt
].__init_fct
,
278 (&result
[step_cnt
]));
280 if (__builtin_expect (status
, __GCONV_OK
) != __GCONV_OK
)
283 /* Make sure we unload this modules. */
285 result
[step_cnt
].__end_fct
= NULL
;
292 /* It's a builtin transformation. */
293 __gconv_get_builtin_trans (current
->code
->module_name
,
296 current
= current
->last
;
299 if (__builtin_expect (failed
, 0) != 0)
301 /* Something went wrong while initializing the modules. */
302 while (++step_cnt
< *nsteps
)
303 __gconv_release_step (&result
[step_cnt
]);
307 if (status
== __GCONV_OK
)
308 status
= __GCONV_NOCONV
;
326 increment_counter (struct __gconv_step
*steps
, size_t nsteps
)
328 /* Increment the user counter. */
330 int result
= __GCONV_OK
;
334 struct __gconv_step
*step
= &steps
[cnt
];
336 if (step
->__counter
++ == 0)
338 /* Skip builtin modules. */
339 if (step
->__modname
!= NULL
)
341 /* Reopen a previously used module. */
342 step
->__shlib_handle
= __gconv_find_shlib (step
->__modname
);
343 if (step
->__shlib_handle
== NULL
)
345 /* Oops, this is the second time we use this module
346 (after unloading) and this time loading failed!? */
348 while (++cnt
< nsteps
)
349 __gconv_release_step (&steps
[cnt
]);
350 result
= __GCONV_NOCONV
;
354 /* The function addresses defined by the module may
356 step
->__fct
= step
->__shlib_handle
->fct
;
357 step
->__init_fct
= step
->__shlib_handle
->init_fct
;
358 step
->__end_fct
= step
->__shlib_handle
->end_fct
;
360 /* These settings can be overridden by the init function. */
361 step
->__btowc_fct
= NULL
;
364 /* Call the init function. */
365 if (step
->__init_fct
!= NULL
)
366 DL_CALL_FCT (step
->__init_fct
, (step
));
374 /* The main function: find a possible derivation from the `fromset' (either
375 the given name or the alias) to the `toset' (again with alias). */
378 find_derivation (const char *toset
, const char *toset_expand
,
379 const char *fromset
, const char *fromset_expand
,
380 struct __gconv_step
**handle
, size_t *nsteps
)
382 struct derivation_step
*first
, *current
, **lastp
, *solution
= NULL
;
383 int best_cost_hi
= INT_MAX
;
384 int best_cost_lo
= INT_MAX
;
387 /* Look whether an earlier call to `find_derivation' has already
388 computed a possible derivation. If so, return it immediately. */
389 result
= derivation_lookup (fromset_expand
?: fromset
, toset_expand
?: toset
,
391 if (result
== __GCONV_OK
)
394 result
= increment_counter (*handle
, *nsteps
);
399 /* The task is to find a sequence of transformations, backed by the
400 existing modules - whether builtin or dynamically loadable -,
401 starting at `fromset' (or `fromset_expand') and ending at `toset'
402 (or `toset_expand'), and with minimal cost.
404 For computer scientists, this is a shortest path search in the
405 graph where the nodes are all possible charsets and the edges are
406 the transformations listed in __gconv_modules_db.
408 For now we use a simple algorithm with quadratic runtime behaviour.
409 A breadth-first search, starting at `fromset' and `fromset_expand'.
410 The list starting at `first' contains all nodes that have been
411 visited up to now, in the order in which they have been visited --
412 excluding the goal nodes `toset' and `toset_expand' which get
413 managed in the list starting at `solution'.
414 `current' walks through the list starting at `first' and looks
415 which nodes are reachable from the current node, adding them to
416 the end of the list [`first' or `solution' respectively] (if
417 they are visited the first time) or updating them in place (if
418 they have have already been visited).
419 In each node of either list, cost_lo and cost_hi contain the
420 minimum cost over any paths found up to now, starting at `fromset'
421 or `fromset_expand', ending at that node. best_cost_lo and
422 best_cost_hi represent the minimum over the elements of the
425 if (fromset_expand
!= NULL
)
427 first
= NEW_STEP (fromset_expand
, 0, 0, NULL
, NULL
);
428 first
->next
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
429 lastp
= &first
->next
->next
;
433 first
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
434 lastp
= &first
->next
;
437 for (current
= first
; current
!= NULL
; current
= current
->next
)
439 /* Now match all the available module specifications against the
440 current charset name. If any of them matches check whether
441 we already have a derivation for this charset. If yes, use the
442 one with the lower costs. Otherwise add the new charset at the
445 The module database is organized in a tree form which allows
446 searching for prefixes. So we search for the first entry with a
447 matching prefix and any other matching entry can be found from
449 struct gconv_module
*node
;
451 /* Maybe it is not necessary anymore to look for a solution for
452 this entry since the cost is already as high (or higher) as
453 the cost for the best solution so far. */
454 if (current
->cost_hi
> best_cost_hi
455 || (current
->cost_hi
== best_cost_hi
456 && current
->cost_lo
>= best_cost_lo
))
459 node
= __gconv_modules_db
;
462 int cmpres
= strcmp (current
->result_set
, node
->from_string
);
465 /* Walk through the list of modules with this prefix and
466 try to match the name. */
467 struct gconv_module
*runp
;
469 /* Check all the modules with this prefix. */
473 const char *result_set
= (strcmp (runp
->to_string
, "-") == 0
474 ? (toset_expand
?: toset
)
476 int cost_hi
= runp
->cost_hi
+ current
->cost_hi
;
477 int cost_lo
= runp
->cost_lo
+ current
->cost_lo
;
478 struct derivation_step
*step
;
480 /* We managed to find a derivation. First see whether
481 we have reached one of the goal nodes. */
482 if (strcmp (result_set
, toset
) == 0
483 || (toset_expand
!= NULL
484 && strcmp (result_set
, toset_expand
) == 0))
486 /* Append to the `solution' list if there
487 is no entry with this name. */
488 for (step
= solution
; step
!= NULL
; step
= step
->next
)
489 if (strcmp (result_set
, step
->result_set
) == 0)
494 step
= NEW_STEP (result_set
,
497 step
->next
= solution
;
500 else if (step
->cost_hi
> cost_hi
501 || (step
->cost_hi
== cost_hi
502 && step
->cost_lo
> cost_lo
))
504 /* A better path was found for the node,
505 on the `solution' list. */
507 step
->last
= current
;
508 step
->cost_hi
= cost_hi
;
509 step
->cost_lo
= cost_lo
;
512 /* Update best_cost accordingly. */
513 if (cost_hi
< best_cost_hi
514 || (cost_hi
== best_cost_hi
515 && cost_lo
< best_cost_lo
))
517 best_cost_hi
= cost_hi
;
518 best_cost_lo
= cost_lo
;
521 else if (cost_hi
< best_cost_hi
522 || (cost_hi
== best_cost_hi
523 && cost_lo
< best_cost_lo
))
525 /* Append at the end of the `first' list if there
526 is no entry with this name. */
527 for (step
= first
; step
!= NULL
; step
= step
->next
)
528 if (strcmp (result_set
, step
->result_set
) == 0)
533 *lastp
= NEW_STEP (result_set
,
536 lastp
= &(*lastp
)->next
;
538 else if (step
->cost_hi
> cost_hi
539 || (step
->cost_hi
== cost_hi
540 && step
->cost_lo
> cost_lo
))
542 /* A better path was found for the node,
543 on the `first' list. */
545 step
->last
= current
;
547 /* Update the cost for all steps. */
548 for (step
= first
; step
!= NULL
;
550 /* But don't update the start nodes. */
551 if (step
->code
!= NULL
)
553 struct derivation_step
*back
;
556 hi
= step
->code
->cost_hi
;
557 lo
= step
->code
->cost_lo
;
559 for (back
= step
->last
; back
->code
!= NULL
;
562 hi
+= back
->code
->cost_hi
;
563 lo
+= back
->code
->cost_lo
;
570 /* Likewise for the nodes on the solution list.
571 Also update best_cost accordingly. */
572 for (step
= solution
; step
!= NULL
;
575 step
->cost_hi
= (step
->code
->cost_hi
576 + step
->last
->cost_hi
);
577 step
->cost_lo
= (step
->code
->cost_lo
578 + step
->last
->cost_lo
);
580 if (step
->cost_hi
< best_cost_hi
581 || (step
->cost_hi
== best_cost_hi
582 && step
->cost_lo
< best_cost_lo
))
584 best_cost_hi
= step
->cost_hi
;
585 best_cost_lo
= step
->cost_lo
;
593 while (runp
!= NULL
);
604 if (solution
!= NULL
)
606 /* We really found a way to do the transformation. */
608 /* Choose the best solution. This is easy because we know that
609 the solution list has at most length 2 (one for every possible
611 if (solution
->next
!= NULL
)
613 struct derivation_step
*solution2
= solution
->next
;
615 if (solution2
->cost_hi
< solution
->cost_hi
616 || (solution2
->cost_hi
== solution
->cost_hi
617 && solution2
->cost_lo
< solution
->cost_lo
))
618 solution
= solution2
;
621 /* Now build a data structure describing the transformation steps. */
622 result
= gen_steps (solution
, toset_expand
?: toset
,
623 fromset_expand
?: fromset
, handle
, nsteps
);
627 /* We haven't found a transformation. Clear the result values. */
632 /* Add result in any case to list of known derivations. */
633 add_derivation (fromset_expand
?: fromset
, toset_expand
?: toset
,
640 /* Control of initialization. */
641 __libc_once_define (static, once
);
645 do_lookup_alias (const char *name
)
647 struct gconv_alias key
;
648 struct gconv_alias
**found
;
650 key
.fromname
= (char *) name
;
651 found
= __tfind (&key
, &__gconv_alias_db
, __gconv_alias_compare
);
652 return found
!= NULL
? (*found
)->toname
: NULL
;
658 __gconv_compare_alias (const char *name1
, const char *name2
)
662 /* Ensure that the configuration data is read. */
663 __libc_once (once
, __gconv_read_conf
);
665 if (__gconv_compare_alias_cache (name1
, name2
, &result
) != 0)
666 result
= strcmp (do_lookup_alias (name1
) ?: name1
,
667 do_lookup_alias (name2
) ?: name2
);
675 __gconv_find_transform (const char *toset
, const char *fromset
,
676 struct __gconv_step
**handle
, size_t *nsteps
,
679 const char *fromset_expand
;
680 const char *toset_expand
;
683 /* Ensure that the configuration data is read. */
684 __libc_once (once
, __gconv_read_conf
);
686 /* Acquire the lock. */
687 __libc_lock_lock (lock
);
689 result
= __gconv_lookup_cache (toset
, fromset
, handle
, nsteps
, flags
);
690 if (result
!= __GCONV_NODB
)
692 /* We have a cache and could resolve the request, successful or not. */
693 __libc_lock_unlock (lock
);
697 /* If we don't have a module database return with an error. */
698 if (__gconv_modules_db
== NULL
)
700 __libc_lock_unlock (lock
);
701 return __GCONV_NOCONV
;
704 /* See whether the names are aliases. */
705 fromset_expand
= do_lookup_alias (fromset
);
706 toset_expand
= do_lookup_alias (toset
);
708 if (__builtin_expect (flags
& GCONV_AVOID_NOCONV
, 0)
709 /* We are not supposed to create a pseudo transformation (means
710 copying) when the input and output character set are the same. */
711 && (strcmp (toset
, fromset
) == 0
712 || (toset_expand
!= NULL
&& strcmp (toset_expand
, fromset
) == 0)
713 || (fromset_expand
!= NULL
714 && (strcmp (toset
, fromset_expand
) == 0
715 || (toset_expand
!= NULL
716 && strcmp (toset_expand
, fromset_expand
) == 0)))))
718 /* Both character sets are the same. */
719 __libc_lock_unlock (lock
);
720 return __GCONV_NOCONV
;
723 result
= find_derivation (toset
, toset_expand
, fromset
, fromset_expand
,
726 /* Release the lock. */
727 __libc_lock_unlock (lock
);
729 /* The following code is necessary since `find_derivation' will return
730 GCONV_OK even when no derivation was found but the same request
731 was processed before. I.e., negative results will also be cached. */
732 return (result
== __GCONV_OK
733 ? (*handle
== NULL
? __GCONV_NOCONV
: __GCONV_OK
)
738 /* Release the entries of the modules list. */
741 __gconv_close_transform (struct __gconv_step
*steps
, size_t nsteps
)
743 int result
= __GCONV_OK
;
746 /* Acquire the lock. */
747 __libc_lock_lock (lock
);
752 __gconv_release_step (&steps
[cnt
]);
755 /* If we use the cache we free a bit more since we don't keep any
756 transformation records around, they are cheap enough to
758 __gconv_release_cache (steps
, nsteps
);
760 /* Release the lock. */
761 __libc_lock_unlock (lock
);
767 /* Free the modules mentioned. */
770 free_modules_db (struct gconv_module
*node
)
772 if (node
->left
!= NULL
)
773 free_modules_db (node
->left
);
774 if (node
->right
!= NULL
)
775 free_modules_db (node
->right
);
778 struct gconv_module
*act
= node
;
780 if (act
->module_name
[0] == '/')
783 while (node
!= NULL
);
787 /* Free all resources if necessary. */
788 libc_freeres_fn (free_mem
)
790 if (__gconv_alias_db
!= NULL
)
791 __tdestroy (__gconv_alias_db
, free
);
793 if (__gconv_modules_db
!= NULL
)
794 free_modules_db (__gconv_modules_db
);
796 if (known_derivations
!= NULL
)
797 __tdestroy (known_derivations
, free_derivation
);