Skip to content

Commit

Permalink
Fix smart converter encoding guessing and add test case
Browse files Browse the repository at this point in the history
  • Loading branch information
Jesse van den Kieboom authored and nacho committed Jan 17, 2010
1 parent b6a28db commit 61a8c6f
Show file tree
Hide file tree
Showing 7 changed files with 366 additions and 88 deletions.
20 changes: 13 additions & 7 deletions gedit/gedit-document.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@

#include "gedit-prefs-manager-app.h"
#include "gedit-document.h"
#include "gedit-convert.h"
#include "gedit-debug.h"
#include "gedit-utils.h"
#include "gedit-language-manager.h"
Expand Down Expand Up @@ -87,8 +88,6 @@ static void gedit_document_save_real (GeditDocument *doc,
const gchar *uri,
const GeditEncoding *encoding,
GeditDocumentSaveFlags flags);
static void gedit_document_set_readonly (GeditDocument *doc,
gboolean readonly);
static void to_search_region_range (GeditDocument *doc,
GtkTextIter *start,
GtkTextIter *end);
Expand Down Expand Up @@ -1046,9 +1045,16 @@ set_readonly (GeditDocument *doc,
doc->priv->readonly = readonly;
}

static void
gedit_document_set_readonly (GeditDocument *doc,
gboolean readonly)
/**
* gedit_document_set_readonly:
* @doc: a #GeditDocument
* @readonly: %TRUE to se the document as read-only
*
* If @readonly is %TRUE sets @doc as read-only.
*/
void
_gedit_document_set_readonly (GeditDocument *doc,
gboolean readonly)
{
gedit_debug (DEBUG_DOCUMENT);

Expand Down Expand Up @@ -1119,7 +1125,7 @@ document_loader_loaded (GeditDocumentLoader *loader,
GeditDocument *doc)
{
/* load was successful */
if (error == NULL)
if (error == NULL || error->code == GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK)
{
GtkTextIter iter;
GFileInfo *info;
Expand Down Expand Up @@ -1360,7 +1366,7 @@ document_saver_saving (GeditDocumentSaver *saver,

g_get_current_time (&doc->priv->time_of_last_save_or_load);

gedit_document_set_readonly (doc, FALSE);
_gedit_document_set_readonly (doc, FALSE);

gtk_text_buffer_set_modified (GTK_TEXT_BUFFER (doc),
FALSE);
Expand Down
3 changes: 3 additions & 0 deletions gedit/gedit-document.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ void gedit_document_set_metadata (GeditDocument *doc,
/*
* Non exported functions
*/
void _gedit_document_set_readonly (GeditDocument *doc,
gboolean readonly);

glong _gedit_document_get_seconds_since_last_save_or_load
(GeditDocument *doc);

Expand Down
6 changes: 3 additions & 3 deletions gedit/gedit-gio-document-loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ remote_load_completed_or_failed (GeditGioDocumentLoader *gvloader, AsyncData *as

if (async)
async_data_free (async);

if (gvloader->priv->stream)
g_input_stream_close_async (G_INPUT_STREAM (gvloader->priv->stream),
G_PRIORITY_HIGH, NULL, NULL, NULL);
Expand Down Expand Up @@ -359,11 +359,11 @@ async_read_cb (GInputStream *stream,
if ((gedit_smart_charset_converter_get_num_fallbacks (gvloader->priv->converter) != 0) &&
gvloader->priv->error == NULL)
{
/* FIXME: Maybe check for some specific error ? */
g_set_error_literal (&gvloader->priv->error,
GEDIT_DOCUMENT_ERROR,
GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK,
_("There was a problem blah blah")); /* FIXME */
"There was a conversion error and it was "
"needed to use a fallback char");
}

end_append_text_to_document (GEDIT_DOCUMENT_LOADER (gvloader));
Expand Down
84 changes: 66 additions & 18 deletions gedit/gedit-io-error-message-area.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,8 @@ create_option_menu (GtkWidget *message_area, GtkWidget *vbox)

static GtkWidget *
create_conversion_error_message_area (const gchar *primary_text,
const gchar *secondary_text)
const gchar *secondary_text,
gboolean edit_anyway)
{
GtkWidget *message_area;
GtkWidget *hbox_content;
Expand All @@ -558,21 +559,53 @@ create_conversion_error_message_area (const gchar *primary_text,
_("_Retry"),
GTK_STOCK_REDO,
GTK_RESPONSE_OK);
gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
GTK_STOCK_CANCEL,
GTK_RESPONSE_CANCEL);

if (edit_anyway)
{
gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
_("Edit Any_way"),
GTK_RESPONSE_YES);
gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
_("D_on't Edit"),
GTK_RESPONSE_CANCEL);
}
else
{
gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
GTK_STOCK_CANCEL,
GTK_RESPONSE_CANCEL);
}
#else
message_area = gtk_info_bar_new ();

info_bar_add_stock_button_with_text (GTK_INFO_BAR (message_area),
_("_Retry"),
GTK_STOCK_REDO,
GTK_RESPONSE_OK);

gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
GTK_STOCK_CANCEL,
GTK_RESPONSE_CANCEL);
gtk_info_bar_set_message_type (GTK_INFO_BAR (message_area),
GTK_MESSAGE_ERROR);
if (edit_anyway)
{
gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
/* Translators: the access key chosen for this string should be
different from other main menu access keys (Open, Edit, View...) */
_("Edit Any_way"),
GTK_RESPONSE_YES);
gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
/* Translators: the access key chosen for this string should be
different from other main menu access keys (Open, Edit, View...) */
_("D_on't Edit"),
GTK_RESPONSE_CANCEL);
gtk_info_bar_set_message_type (GTK_INFO_BAR (message_area),
GTK_MESSAGE_WARNING);
}
else
{
gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
GTK_STOCK_CANCEL,
GTK_RESPONSE_CANCEL);
gtk_info_bar_set_message_type (GTK_INFO_BAR (message_area),
GTK_MESSAGE_ERROR);
}
#endif

hbox_content = gtk_hbox_new (FALSE, 8);
Expand Down Expand Up @@ -628,6 +661,7 @@ gedit_conversion_error_while_loading_message_area_new (
gchar *uri_for_display;
gchar *temp_uri_for_display;
GtkWidget *message_area;
gboolean edit_anyway = FALSE;

g_return_val_if_fail (uri != NULL, NULL);
g_return_val_if_fail (error != NULL, NULL);
Expand All @@ -640,8 +674,8 @@ gedit_conversion_error_while_loading_message_area_new (
* though the dialog uses wrapped text, if the URI doesn't contain
* white space then the text-wrapping code is too stupid to wrap it.
*/
temp_uri_for_display = gedit_utils_str_middle_truncate (full_formatted_uri,
MAX_URI_IN_DIALOG_LENGTH);
temp_uri_for_display = gedit_utils_str_middle_truncate (full_formatted_uri,
MAX_URI_IN_DIALOG_LENGTH);
g_free (full_formatted_uri);

uri_for_display = g_markup_printf_escaped ("<i>%s</i>", temp_uri_for_display);
Expand All @@ -652,18 +686,29 @@ gedit_conversion_error_while_loading_message_area_new (
else
encoding_name = g_strdup ("UTF-8");

if (error->domain == GEDIT_CONVERT_ERROR)
if (error->domain == GEDIT_CONVERT_ERROR &&
error->code == GEDIT_CONVERT_ERROR_AUTO_DETECTION_FAILED)
{
g_return_val_if_fail (error->code == GEDIT_CONVERT_ERROR_AUTO_DETECTION_FAILED, NULL);

error_message = g_strdup_printf (_("Could not open the file %s."),
uri_for_display);
uri_for_display);
message_details = g_strconcat (_("gedit has not been able to detect "
"the character coding."), "\n",
_("Please check that you are not trying to open a binary file."), "\n",
_("Select a character coding from the menu and try again."), NULL);
}
else
else if (error->domain == GEDIT_DOCUMENT_ERROR &&
error->code == GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK)
{
error_message = g_strdup_printf (_("There was a problem opening the file %s."),
uri_for_display);
message_details = g_strconcat (_("The file you opened has some invalid characters, "
"if you continue editing this file you could make this "
"document useless."), "\n",
_("You can also choose another character encoding and try again."),
NULL);
edit_anyway = TRUE;
}
else
{

error_message = g_strdup_printf (_("Could not open the file %s using the %s character coding."),
Expand All @@ -673,7 +718,9 @@ gedit_conversion_error_while_loading_message_area_new (
_("Select a different character coding from the menu and try again."), NULL);
}

message_area = create_conversion_error_message_area (error_message, message_details);
message_area = create_conversion_error_message_area (error_message,
message_details,
edit_anyway);

g_free (uri_for_display);
g_free (encoding_name);
Expand Down Expand Up @@ -726,7 +773,8 @@ gedit_conversion_error_while_saving_message_area_new (

message_area = create_conversion_error_message_area (
error_message,
message_details);
message_details,
FALSE);

g_free (uri_for_display);
g_free (encoding_name);
Expand Down
104 changes: 76 additions & 28 deletions gedit/gedit-smart-charset-converter.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,73 @@ get_encoding (GeditSmartCharsetConverter *smart)
return (const GeditEncoding *)smart->priv->current_encoding->data;
}

static gboolean
try_convert (GCharsetConverter *converter,
const void *inbuf,
gsize inbuf_size)
{
GError *err;
gsize bytes_read, nread;
gsize bytes_written, nwritten;
GConverterResult res;
gchar *out;
gboolean ret;
gsize out_size;

err = NULL;
nread = 0;
nwritten = 0;
out_size = inbuf_size * 4;
out = g_malloc (out_size);

do
{
res = g_converter_convert (G_CONVERTER (converter),
inbuf + nread,
inbuf_size - nread,
out + nwritten,
out_size - nwritten,
G_CONVERTER_INPUT_AT_END,
&bytes_read,
&bytes_written,
&err);

nread += bytes_read;
nwritten += bytes_written;
} while (res != G_CONVERTER_FINISHED && res != G_CONVERTER_ERROR && err == NULL);

if (err != NULL)
{
if (err->code == G_CONVERT_ERROR_PARTIAL_INPUT)
{
/* FIXME We can get partial input while guessing the
encoding because we just take some amount of text
to guess from. */
ret = TRUE;
}
else
{
ret = FALSE;
}

g_error_free (err);
}
else
{
ret = TRUE;
}

/* FIXME: Check the remainder? */
if (ret == TRUE && !g_utf8_validate (out, nwritten, NULL))
{
ret = FALSE;
}

g_free (out);

return ret;
}

static GCharsetConverter *
guess_encoding (GeditSmartCharsetConverter *smart,
const void *inbuf,
Expand All @@ -136,18 +203,14 @@ guess_encoding (GeditSmartCharsetConverter *smart,
while (TRUE)
{
const GeditEncoding *enc;
gchar outbuf[inbuf_size];
GConverterResult ret;
gsize read, written;
GError *err = NULL;

if (conv != NULL)
{
g_object_unref (conv);
conv = NULL;
}

/* We get the first encoding we have in the list */
/* We get an encoding from the list */
enc = get_encoding (smart);

/* if it is NULL we didn't guess anything */
Expand All @@ -168,7 +231,7 @@ guess_encoding (GeditSmartCharsetConverter *smart,
break;
}

/* Check if the end is just less than one char */
/* Check if the end is less than one char */
remainder = inbuf_size - (end - (gchar *)inbuf);
if (remainder < 6)
{
Expand All @@ -189,35 +252,16 @@ guess_encoding (GeditSmartCharsetConverter *smart,
break;
}

ret = g_converter_convert (G_CONVERTER (conv),
inbuf,
inbuf_size,
outbuf,
inbuf_size,
0,
&read,
&written,
&err);

if (err != NULL)
{
/* FIXME: Is this ok or should we just skip it? */
if (err->code == G_CONVERT_ERROR_PARTIAL_INPUT)
{
g_error_free (err);
break;
}

g_error_free (err);
}
else
/* Try to convert */
if (try_convert (conv, inbuf, inbuf_size))
{
break;
}
}

if (conv != NULL)
{
g_converter_reset (G_CONVERTER (conv));
g_charset_converter_set_use_fallback (conv, TRUE);
}

Expand Down Expand Up @@ -343,5 +387,9 @@ gedit_smart_charset_converter_get_num_fallbacks (GeditSmartCharsetConverter *sma
{
g_return_val_if_fail (GEDIT_IS_SMART_CHARSET_CONVERTER (smart), FALSE);

if (smart->priv->charset_conv == NULL)
return FALSE;

return g_charset_converter_get_num_fallbacks (smart->priv->charset_conv) != 0;
}

Loading

0 comments on commit 61a8c6f

Please sign in to comment.