From 7f9fe4dcccbb21715c61b24dc842d5588890dff6 Mon Sep 17 00:00:00 2001 From: Didier Roche Date: Wed, 26 Jul 2023 11:14:28 +0200 Subject: [PATCH 1/4] Fix multi-line and backquotes string pot generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The extraction of gettext entry does not translate properly multiline nor backquotes: * Multilines are printed as is, instead of follow the gettext spec: https://www.gnu.org/software/gettext/manual/html_node/Normalizing.html * Strings with backquotes as printed as “msgid `this is my string`”, * Multilines strings with backquotes are formatted as multi-lines to, without closing quotes on each lines. The loading is correct though, this only impact the generation file. Those generated files are thus then invalid and can’t be compile to mo file. --- cli/xgotext/parser/dir/golang.go | 6 +++-- cli/xgotext/parser/domain.go | 36 +++++++++++++++++++++++++-- cli/xgotext/parser/pkg-tree/golang.go | 7 +++--- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/cli/xgotext/parser/dir/golang.go b/cli/xgotext/parser/dir/golang.go index f65d572..9e8e002 100644 --- a/cli/xgotext/parser/dir/golang.go +++ b/cli/xgotext/parser/dir/golang.go @@ -248,8 +248,9 @@ func (g *GoFile) parseGetter(def GetterDef, args []*ast.BasicLit, pos string) { return } + msgID, _ := strconv.Unquote(args[def.Id].Value) trans := parser.Translation{ - MsgId: args[def.Id].Value, + MsgId: msgID, SourceLocations: []string{pos}, } if def.Plural > 0 { @@ -258,7 +259,8 @@ func (g *GoFile) parseGetter(def GetterDef, args []*ast.BasicLit, pos string) { log.Printf("ERR: Unsupported call at %s (Plural not a string)", pos) return } - trans.MsgIdPlural = args[def.Plural].Value + msgIDPlural, _ := strconv.Unquote(args[def.Plural].Value) + trans.MsgIdPlural = msgIDPlural } if def.Context > 0 { // Context must be a string diff --git a/cli/xgotext/parser/domain.go b/cli/xgotext/parser/domain.go index 55ff4a5..8ff4842 100644 --- a/cli/xgotext/parser/domain.go +++ b/cli/xgotext/parser/domain.go @@ -37,13 +37,13 @@ func (t *Translation) Dump() string { data = append(data, "msgctxt "+t.Context) } - data = append(data, "msgid "+t.MsgId) + data = append(data, toMsgIDString("msgid", t.MsgId)...) if t.MsgIdPlural == "" { data = append(data, "msgstr \"\"") } else { + data = append(data, toMsgIDString("msgid_plural", t.MsgIdPlural)...) data = append(data, - "msgid_plural "+t.MsgIdPlural, "msgstr[0] \"\"", "msgstr[1] \"\"") } @@ -51,6 +51,38 @@ func (t *Translation) Dump() string { return strings.Join(data, "\n") } +// toMsgIDString returns the spec implementation of multi line support of po files by aligning msgid on it. +func toMsgIDString(prefix, msgID string) []string { + elems := strings.Split(msgID, "\n") + // Main case: single line. + if len(elems) == 1 { + return []string{fmt.Sprintf(`%s "%s"`, prefix, msgID)} + } + + // Only one line, but finishing with \n + if strings.Count(msgID, "\n") == 1 && strings.HasSuffix(msgID, "\n") { + return []string{fmt.Sprintf(`%s "%s\n"`, prefix, strings.TrimSuffix(msgID, "\n"))} + } + + // Skip last element for multiline which is an empty + var shouldEndWithEOL bool + if elems[len(elems)-1] == "" { + elems = elems[:len(elems)-1] + shouldEndWithEOL = true + } + data := []string{fmt.Sprintf(`%s ""`, prefix)} + for i, v := range elems { + l := fmt.Sprintf(`"%s\n"`, v) + // Last element without EOL + if i == len(elems)-1 && !shouldEndWithEOL { + l = fmt.Sprintf(`"%s"`, v) + } + data = append(data, l) + } + + return data +} + // TranslationMap contains a map of translations with the ID as key type TranslationMap map[string]*Translation diff --git a/cli/xgotext/parser/pkg-tree/golang.go b/cli/xgotext/parser/pkg-tree/golang.go index dd713ac..7c040da 100644 --- a/cli/xgotext/parser/pkg-tree/golang.go +++ b/cli/xgotext/parser/pkg-tree/golang.go @@ -17,7 +17,6 @@ import ( const gotextPkgPath = "github.com/leonelquinteros/gotext" - type GetterDef struct { Id int Plural int @@ -287,8 +286,9 @@ func (g *GoFile) parseGetter(def GetterDef, args []*ast.BasicLit, pos string) { return } + msgID, _ := strconv.Unquote(args[def.Id].Value) trans := parser.Translation{ - MsgId: args[def.Id].Value, + MsgId: msgID, SourceLocations: []string{pos}, } if def.Plural > 0 { @@ -297,7 +297,8 @@ func (g *GoFile) parseGetter(def GetterDef, args []*ast.BasicLit, pos string) { log.Printf("ERR: Unsupported call at %s (Plural not a string)", pos) return } - trans.MsgIdPlural = args[def.Plural].Value + msgIDPlural, _ := strconv.Unquote(args[def.Plural].Value) + trans.MsgIdPlural = msgIDPlural } if def.Context > 0 { // Context must be a string From b65bc5e2b19c1de30c1bc5897ee04e77b3014576 Mon Sep 17 00:00:00 2001 From: Didier Roche Date: Wed, 26 Jul 2023 11:23:25 +0200 Subject: [PATCH 2/4] Add more test cases and refresh tests for pot generation Ensure that the .po reflects latest translations (some were missing) Add more translations to reflect the new fixed use cases. --- cli/xgotext/fixtures/i18n/default.po | 72 +++++++++++++++++++--- cli/xgotext/fixtures/i18n/domain2.po | 9 ++- cli/xgotext/fixtures/i18n/translations.po | 9 +-- cli/xgotext/fixtures/main.go | 15 +++++ cli/xgotext/parser/pkg-tree/golang_test.go | 16 ++++- 5 files changed, 103 insertions(+), 18 deletions(-) diff --git a/cli/xgotext/fixtures/i18n/default.po b/cli/xgotext/fixtures/i18n/default.po index 344d94d..ca62aae 100644 --- a/cli/xgotext/fixtures/i18n/default.po +++ b/cli/xgotext/fixtures/i18n/default.po @@ -7,22 +7,80 @@ msgstr "" "Language: \n" "X-Generator: xgotext\n" - -#: fixtures/main.go:23 -#. gotext.Get +#: fixtures/main.go:35 +#: fixtures/main.go:37 msgid "My text on 'domain-name' domain" msgstr "" -#: fixtures/main.go:38 -#. l.GetN +#: fixtures/main.go:75 msgid "Singular" msgid_plural "Plural" msgstr[0] "" msgstr[1] "" -#: fixtures/main.go:40 -#. l.GetN +#: fixtures/main.go:77 msgid "SingularVar" msgid_plural "PluralVar" msgstr[0] "" msgstr[1] "" + +#: fixtures/main.go:44 +msgid "alias call" +msgstr "" + +#: fixtures/main.go:104 +msgid "inside dummy" +msgstr "" + +#: fixtures/pkg/pkg.go:15 +msgid "inside sub package" +msgstr "" + +#: fixtures/main.go:51 +msgid "" +"multi\n" +"line\n" +"string\n" +msgstr "" + +#: fixtures/main.go:54 +msgid "" +"multi\n" +"line\n" +"string\n" +"ending with\n" +"EOL\n" +msgstr "" + +#: fixtures/main.go:59 +msgid "" +"multline\n" +"ending with EOL\n" +msgstr "" + +#: fixtures/main.go:50 +msgid "raw string with\nmultiple\nEOL" +msgstr "" + +#: fixtures/main.go:48 +msgid "string ending with EOL\n" +msgstr "" + +#: fixtures/main.go:49 +msgid "" +"string with\n" +"multiple\n" +"EOL\n" +msgstr "" + +#: fixtures/main.go:47 +msgid "string with backquotes" +msgstr "" + +#: fixtures/main.go:91 +msgid "translate package" +msgstr "" + +#: fixtures/main.go:92 +msgid "translate sub package" +msgstr "" \ No newline at end of file diff --git a/cli/xgotext/fixtures/i18n/domain2.po b/cli/xgotext/fixtures/i18n/domain2.po index 0acd017..6954449 100644 --- a/cli/xgotext/fixtures/i18n/domain2.po +++ b/cli/xgotext/fixtures/i18n/domain2.po @@ -7,8 +7,11 @@ msgstr "" "Language: \n" "X-Generator: xgotext\n" - -#: fixtures/main.go:26 -#. gotext.GetD +#: fixtures/main.go:61 msgid "Another text on a different domain" msgstr "" + +#: fixtures/main.go:78 +msgctxt "ctx" +msgid "string" +msgstr "" \ No newline at end of file diff --git a/cli/xgotext/fixtures/i18n/translations.po b/cli/xgotext/fixtures/i18n/translations.po index 024ff43..4f05005 100644 --- a/cli/xgotext/fixtures/i18n/translations.po +++ b/cli/xgotext/fixtures/i18n/translations.po @@ -7,16 +7,13 @@ msgstr "" "Language: \n" "X-Generator: xgotext\n" - -#: fixtures/main.go:35 -#. l.GetD +#: fixtures/main.go:71 msgid "Translate this" msgstr "" -#: fixtures/main.go:43 -#. l.GetNDC +#: fixtures/main.go:79 msgctxt "NDC-CTX" msgid "ndc" msgid_plural "ndcs" msgstr[0] "" -msgstr[1] "" +msgstr[1] "" \ No newline at end of file diff --git a/cli/xgotext/fixtures/main.go b/cli/xgotext/fixtures/main.go index 29944d1..8dbd3d9 100644 --- a/cli/xgotext/fixtures/main.go +++ b/cli/xgotext/fixtures/main.go @@ -43,6 +43,21 @@ func main() { // same with alias package name fmt.Println(alias.Get("alias call")) + // Special strings + fmt.Println(gotext.Get(`string with backquotes`)) + fmt.Println(gotext.Get("string ending with EOL\n")) + fmt.Println(gotext.Get("string with\nmultiple\nEOL")) + fmt.Println(gotext.Get(`raw string with\nmultiple\nEOL`)) + fmt.Println(gotext.Get(`multi +line +string`)) + fmt.Println(gotext.Get(`multi +line +string +ending with +EOL`)) + fmt.Println(gotext.Get("multline\nending with EOL\n")) + // Translate text from a different domain without reconfigure fmt.Println(gotext.GetD("domain2", "Another text on a different domain")) diff --git a/cli/xgotext/parser/pkg-tree/golang_test.go b/cli/xgotext/parser/pkg-tree/golang_test.go index b79f686..84c8fc3 100644 --- a/cli/xgotext/parser/pkg-tree/golang_test.go +++ b/cli/xgotext/parser/pkg-tree/golang_test.go @@ -1,10 +1,11 @@ package pkg_tree import ( - "github.com/leonelquinteros/gotext/cli/xgotext/parser" "os" "path/filepath" "testing" + + "github.com/leonelquinteros/gotext/cli/xgotext/parser" ) func TestParsePkgTree(t *testing.T) { @@ -23,7 +24,18 @@ func TestParsePkgTree(t *testing.T) { t.Error(err) } - translations := []string{"\"inside sub package\"", "\"My text on 'domain-name' domain\"", "\"alias call\"", "\"Singular\"", "\"SingularVar\"", "\"translate package\"", "\"translate sub package\"", "\"inside dummy\""} + translations := []string{"inside sub package", "My text on 'domain-name' domain", "alias call", "Singular", "SingularVar", "translate package", "translate sub package", "inside dummy", + `string with backquotes`, "string ending with EOL\n", "string with\nmultiple\nEOL", `raw string with\nmultiple\nEOL`, + `multi +line +string`, + `multi +line +string +ending with +EOL`, + "multline\nending with EOL\n", + } if len(translations) != len(data.Domains[defaultDomain].Translations) { t.Error("translations count mismatch") From 67c19dca014acf336eba5f7c2d7620c1517a22c0 Mon Sep 17 00:00:00 2001 From: Didier Roche Date: Wed, 26 Jul 2023 13:12:27 +0200 Subject: [PATCH 3/4] Apply same multi-line logic when marhsalling to text Ensure we follow the same rule when exporting to text format. This allows multi-line exports following the convention of single line, EOL at the end of the string and more. --- domain.go | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/domain.go b/domain.go index 114a72f..dfff56e 100644 --- a/domain.go +++ b/domain.go @@ -3,6 +3,7 @@ package gotext import ( "bytes" "encoding/gob" + "fmt" "regexp" "sort" "strconv" @@ -653,9 +654,39 @@ func (do *Domain) MarshalText() ([]byte, error) { } func EscapeSpecialCharacters(s string) string { - s = regexp.MustCompile(`([^\\])(")`).ReplaceAllString(s, "$1\\\"") // Escape non-escaped double quotation marks - s = strings.ReplaceAll(s, "\n", "\"\n\"") // Convert newlines into multi-line strings - return s + s = regexp.MustCompile(`([^\\])(")`).ReplaceAllString(s, "$1\\\"") // Escape non-escaped double quotation marks + + if strings.Count(s, "\n") == 0 { + return s + } + + // Handle EOL and multi-lines + // Only one line, but finishing with \n + if strings.Count(s, "\n") == 1 && strings.HasSuffix(s, "\n") { + return strings.ReplaceAll(s, "\n", "\\n") + } + + elems := strings.Split(s, "\n") + // Skip last element for multiline which is an empty + var shouldEndWithEOL bool + if elems[len(elems)-1] == "" { + elems = elems[:len(elems)-1] + shouldEndWithEOL = true + } + data := []string{(`"`)} + for i, v := range elems { + l := fmt.Sprintf(`"%s\n"`, v) + // Last element without EOL + if i == len(elems)-1 && !shouldEndWithEOL { + l = fmt.Sprintf(`"%s"`, v) + } + // Remove finale " to last element as the whole string will be quoted + if i == len(elems)-1 { + l = strings.TrimSuffix(l, `"`) + } + data = append(data, l) + } + return strings.Join(data, "\n") } // MarshalBinary implements encoding.BinaryMarshaler interface From 4d8518bd51b81191770d1d2e2efccdf1bb5a476d Mon Sep 17 00:00:00 2001 From: Didier Roche Date: Wed, 26 Jul 2023 13:16:54 +0200 Subject: [PATCH 4/4] Adapt multi-line test This is the use case the more demanding: multi-line not finishing by EOL. Adapt the expect case for it. --- domain_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/domain_test.go b/domain_test.go index c5ad58f..db9b737 100644 --- a/domain_test.go +++ b/domain_test.go @@ -134,9 +134,10 @@ func TestDomain_CheckExportFormatting(t *testing.T) { msgstr "" msgid "myid" -msgstr "test string" +msgstr "" +"test string\n" "with \"newline\""` - + if string(poBytes) != expectedOutput { t.Errorf("Exported PO format does not match. Received:\n\n%v\n\n\nExpected:\n\n%v", string(poBytes), expectedOutput) }