Open Chinese Convert  0.4.1
A project for conversion between Traditional and Simplified Chinese
 All Data Structures Files Functions Variables Groups Pages
opencc.c
Go to the documentation of this file.
1 
23 #include "common.h"
24 #include "config_reader.h"
25 #include "converter.h"
26 #include "dict_group.h"
27 #include "dict_chain.h"
28 #include "encoding.h"
29 #include "opencc.h"
30 
31 typedef struct {
32  DictChain* dict_chain;
33  Converter* converter;
34 } OpenccDesc;
35 
36 static opencc_error errnum = OPENCC_ERROR_VOID;
37 static int lib_initialized = 0;
38 
39 static void lib_initialize(void) {
40 #ifdef ENABLE_GETTEXT
41  bindtextdomain(PACKAGE_NAME, LOCALEDIR);
42 #endif /* ifdef ENABLE_GETTEXT */
43  lib_initialized = 1;
44 }
45 
46 size_t opencc_convert(opencc_t t_opencc,
47  ucs4_t** inbuf,
48  size_t* inbuf_left,
49  ucs4_t** outbuf,
50  size_t* outbuf_left) {
51  if (!lib_initialized) {
52  lib_initialize();
53  }
54  OpenccDesc* opencc = (OpenccDesc*)t_opencc;
55  size_t retval = converter_convert(opencc->converter,
56  inbuf,
57  inbuf_left,
58  outbuf,
59  outbuf_left);
60  if (retval == (size_t)-1) {
61  errnum = OPENCC_ERROR_CONVERTER;
62  }
63  return retval;
64 }
65 
66 char* opencc_convert_utf8(opencc_t t_opencc, const char* inbuf, size_t length) {
67  if (!lib_initialized) {
68  lib_initialize();
69  }
70  size_t actual_length = strlen(inbuf);
71  if ((length == (size_t)-1) || (length > actual_length)) {
72  length = actual_length;
73  }
74  ucs4_t* winbuf = utf8_to_ucs4(inbuf, length);
75  if (winbuf == (ucs4_t*)-1) {
76  /* Can not convert input UTF8 to UCS4 */
77  errnum = OPENCC_ERROR_ENCODING;
78  return (char*)-1;
79  }
80  /* Set up UTF8 buffer */
81  size_t outbuf_len = length;
82  size_t outsize = outbuf_len;
83  char* original_outbuf = (char*)malloc(sizeof(char) * (outbuf_len + 1));
84  char* outbuf = original_outbuf;
85  original_outbuf[0] = '\0';
86  /* Set conversion buffer */
87  size_t wbufsize = length + 64;
88  ucs4_t* woutbuf = (ucs4_t*)malloc(sizeof(ucs4_t) * (wbufsize + 1));
89  ucs4_t* pinbuf = winbuf;
90  ucs4_t* poutbuf = woutbuf;
91  size_t inbuf_left, outbuf_left;
92  inbuf_left = ucs4len(winbuf);
93  outbuf_left = wbufsize;
94  while (inbuf_left > 0) {
95  size_t retval = opencc_convert(t_opencc,
96  &pinbuf,
97  &inbuf_left,
98  &poutbuf,
99  &outbuf_left);
100  if (retval == (size_t)-1) {
101  free(outbuf);
102  free(winbuf);
103  free(woutbuf);
104  return (char*)-1;
105  }
106  *poutbuf = L'\0';
107  char* ubuff = ucs4_to_utf8(woutbuf, (size_t)-1);
108  if (ubuff == (char*)-1) {
109  free(outbuf);
110  free(winbuf);
111  free(woutbuf);
112  errnum = OPENCC_ERROR_ENCODING;
113  return (char*)-1;
114  }
115  size_t ubuff_len = strlen(ubuff);
116  while (ubuff_len > outsize) {
117  size_t outbuf_offset = outbuf - original_outbuf;
118  outsize += outbuf_len;
119  outbuf_len += outbuf_len;
120  original_outbuf =
121  (char*)realloc(original_outbuf, sizeof(char) * outbuf_len);
122  outbuf = original_outbuf + outbuf_offset;
123  }
124  strncpy(outbuf, ubuff, ubuff_len);
125  free(ubuff);
126  outbuf += ubuff_len;
127  *outbuf = '\0';
128  outbuf_left = wbufsize;
129  poutbuf = woutbuf;
130  }
131  free(winbuf);
132  free(woutbuf);
133  original_outbuf = (char*)realloc(original_outbuf,
134  sizeof(char) * (strlen(original_outbuf) + 1));
135  return original_outbuf;
136 }
137 
138 opencc_t opencc_open(const char* config_file) {
139  if (!lib_initialized) {
140  lib_initialize();
141  }
142  OpenccDesc* opencc;
143  opencc = (OpenccDesc*)malloc(sizeof(OpenccDesc));
144  opencc->dict_chain = NULL;
145  opencc->converter = converter_open();
146  converter_set_conversion_mode(opencc->converter, OPENCC_CONVERSION_FAST);
147  if (config_file == NULL) {
148  /* TODO load default */
149  assert(0);
150  } else {
151  /* Load config */
152  Config* config = config_open(config_file);
153  if (config == (Config*)-1) {
154  errnum = OPENCC_ERROR_CONFIG;
155  return (opencc_t)-1;
156  }
157  opencc->dict_chain = config_get_dict_chain(config);
158  converter_assign_dictionary(opencc->converter, opencc->dict_chain);
159  config_close(config);
160  }
161  return (opencc_t)opencc;
162 }
163 
164 int opencc_close(opencc_t t_opencc) {
165  if (!lib_initialized) {
166  lib_initialize();
167  }
168  OpenccDesc* opencc = (OpenccDesc*)t_opencc;
169  converter_close(opencc->converter);
170  if (opencc->dict_chain != NULL) {
171  dict_chain_delete(opencc->dict_chain);
172  }
173  free(opencc);
174  return 0;
175 }
176 
177 int opencc_dict_load(opencc_t t_opencc,
178  const char* dict_filename,
179  opencc_dictionary_type dict_type) {
180  if (!lib_initialized) {
181  lib_initialize();
182  }
183  OpenccDesc* opencc = (OpenccDesc*)t_opencc;
185  if (opencc->dict_chain == NULL) {
186  opencc->dict_chain = dict_chain_new(NULL);
187  DictGroup = dict_chain_add_group(opencc->dict_chain);
188  } else {
189  DictGroup = dict_chain_get_group(opencc->dict_chain, 0);
190  }
191  int retval = dict_group_load(DictGroup, dict_filename, dict_type);
192  if (retval == -1) {
193  errnum = OPENCC_ERROR_DICTLOAD;
194  return -1;
195  }
196  converter_assign_dictionary(opencc->converter, opencc->dict_chain);
197  return retval;
198 }
199 
200 void opencc_set_conversion_mode(opencc_t t_opencc,
201  opencc_conversion_mode conversion_mode) {
202  if (!lib_initialized) {
203  lib_initialize();
204  }
205  OpenccDesc* opencc = (OpenccDesc*)t_opencc;
206  converter_set_conversion_mode(opencc->converter, conversion_mode);
207 }
208 
209 opencc_error opencc_errno(void) {
210  if (!lib_initialized) {
211  lib_initialize();
212  }
213  return errnum;
214 }
215 
216 void opencc_perror(const char* spec) {
217  if (!lib_initialized) {
218  lib_initialize();
219  }
220  perr(spec);
221  perr("\n");
222  switch (errnum) {
223  case OPENCC_ERROR_VOID:
224  break;
225  case OPENCC_ERROR_DICTLOAD:
226  dictionary_perror(_("Dictionary loading error"));
227  break;
228  case OPENCC_ERROR_CONFIG:
229  config_perror(_("Configuration error"));
230  break;
231  case OPENCC_ERROR_CONVERTER:
232  converter_perror(_("Converter error"));
233  break;
234  case OPENCC_ERROR_ENCODING:
235  perr(_("Encoding error"));
236  break;
237  default:
238  perr(_("Unknown"));
239  }
240  perr("\n");
241 }