diff --git a/domain.go b/domain.go index b52fd8a..92d736d 100644 --- a/domain.go +++ b/domain.go @@ -1,10 +1,9 @@ package gotext import ( - "bufio" "bytes" "encoding/gob" - "net/textproto" + "sort" "strconv" "strings" "sync" @@ -17,7 +16,7 @@ import ( // Domain has all the common functions for dealing with a gettext domain // it's initialized with a GettextFile (which represents either a Po or Mo file) type Domain struct { - Headers textproto.MIMEHeader + Headers HeaderMap // Language header Language string @@ -26,6 +25,9 @@ type Domain struct { // Plural-Forms header PluralForms string + // Preserve comments at head of PO for round-trip + headerComments []string + // Parsed Plural-Forms header values nplurals int plural string @@ -43,11 +45,43 @@ type Domain struct { // Parsing buffers trBuffer *Translation ctxBuffer string + refBuffer string +} + +// Preserve MIMEHeader behaviour, without the canonicalisation +type HeaderMap map[string][]string + +func (m HeaderMap) Add(key, value string) { + m[key] = append(m[key], value) +} +func (m HeaderMap) Del(key string) { + delete(m, key) +} +func (m HeaderMap) Get(key string) string { + if m == nil { + return "" + } + v := m[key] + if len(v) == 0 { + return "" + } + return v[0] +} +func (m HeaderMap) Set(key, value string) { + m[key] = []string{value} +} +func (m HeaderMap) Values(key string) []string { + if m == nil { + return nil + } + return m[key] } func NewDomain() *Domain { domain := new(Domain) + domain.Headers = make(HeaderMap) + domain.headerComments = make([]string, 0) domain.translations = make(map[string]*Translation) domain.contexts = make(map[string]map[string]*Translation) domain.pluralTranslations = make(map[string]*Translation) @@ -73,28 +107,42 @@ func (do *Domain) pluralForm(n int) int { // parseHeaders retrieves data from previously parsed headers. it's called by both Mo and Po when parsing func (do *Domain) parseHeaders() { - // Make sure we end with 2 carriage returns. - empty := "" - if _, ok := do.translations[empty]; ok { - empty = do.translations[empty].Get() + raw := "" + if _, ok := do.translations[raw]; ok { + raw = do.translations[raw].Get() } - raw := empty + "\n\n" - // Read - reader := bufio.NewReader(strings.NewReader(raw)) - tp := textproto.NewReader(reader) + // textproto.ReadMIMEHeader() forces keys through CanonicalMIMEHeaderKey(); must read header manually to have one-to-one round-trip of keys + languageKey := "Language" + pluralFormsKey := "Plural-Forms" - var err error + rawLines := strings.Split(raw, "\n") + for _, line := range rawLines { + if len(line) == 0 { + continue + } - do.Headers, err = tp.ReadMIMEHeader() - if err != nil { - return + colonIdx := strings.Index(line, ":") + if colonIdx < 0 { + continue + } + + key := line[:colonIdx] + lowerKey := strings.ToLower(key) + if lowerKey == strings.ToLower(languageKey) { + languageKey = key + } else if lowerKey == strings.ToLower(pluralFormsKey) { + pluralFormsKey = key + } + + value := strings.TrimSpace(line[colonIdx+1:]) + do.Headers.Add(key, value) } // Get/save needed headers - do.Language = do.Headers.Get("Language") + do.Language = do.Headers.Get(languageKey) do.tag = language.Make(do.Language) - do.PluralForms = do.Headers.Get("Plural-Forms") + do.PluralForms = do.Headers.Get(pluralFormsKey) // Parse Plural-Forms formula if do.PluralForms == "" { @@ -126,6 +174,80 @@ func (do *Domain) parseHeaders() { } } +// Drops any translations stored that have not been Set*() since 'po' +// was initialised +func (do *Domain) DropStaleTranslations() { + do.trMutex.Lock() + do.pluralMutex.Lock() + defer do.trMutex.Unlock() + defer do.pluralMutex.Unlock() + + for name, ctx := range do.contexts { + for id, trans := range ctx { + if trans.IsStale() { + delete(ctx, id) + } + } + if len(ctx) == 0 { + delete(do.contexts, name) + } + } + + for id, trans := range do.translations { + if trans.IsStale() { + delete(do.translations, id) + } + } +} + +// Set source references for a given translation +func (do *Domain) SetRefs(str string, refs []string) { + do.trMutex.Lock() + do.pluralMutex.Lock() + defer do.trMutex.Unlock() + defer do.pluralMutex.Unlock() + + if trans, ok := do.translations[str]; ok { + trans.Refs = refs + } else { + trans = NewTranslation() + trans.ID = str + trans.SetRefs(refs) + do.translations[str] = trans + } +} + +// Get source references for a given translation +func (do *Domain) GetRefs(str string) []string { + // Sync read + do.trMutex.RLock() + defer do.trMutex.RUnlock() + + if do.translations != nil { + if trans, ok := do.translations[str]; ok { + return trans.Refs + } + } + return nil +} + +// Set the translation of a given string +func (do *Domain) Set(id, str string) { + do.trMutex.Lock() + do.pluralMutex.Lock() + defer do.trMutex.Unlock() + defer do.pluralMutex.Unlock() + + if trans, ok := do.translations[id]; ok { + trans.Set(str) + } else { + trans = NewTranslation() + trans.ID = id + trans.Set(str) + do.translations[str] = trans + } +} + func (do *Domain) Get(str string, vars ...interface{}) string { // Sync read do.trMutex.RLock() @@ -141,6 +263,27 @@ func (do *Domain) Get(str string, vars ...interface{}) string { return Printf(str, vars...) } +// Set the (N)th plural form for the given string +func (do *Domain) SetN(id, plural string, n int, str string) { + // Get plural form _before_ lock down + pluralForm := do.pluralForm(n) + + do.trMutex.Lock() + do.pluralMutex.Lock() + defer do.trMutex.Unlock() + defer do.pluralMutex.Unlock() + + if trans, ok := do.translations[id]; ok { + trans.SetN(pluralForm, str) + } else { + trans = NewTranslation() + trans.ID = id + trans.PluralID = plural + trans.SetN(pluralForm, str) + do.translations[str] = trans + } +} + // GetN retrieves the (N)th plural form of Translation for the given string. // Supports optional parameters (vars... interface{}) to be inserted on the formatted string using the fmt.Printf syntax. func (do *Domain) GetN(str, plural string, n int, vars ...interface{}) string { @@ -161,6 +304,32 @@ func (do *Domain) GetN(str, plural string, n int, vars ...interface{}) string { return Printf(plural, vars...) } +// Set the translation for the given string in the given context +func (do *Domain) SetC(id, ctx, str string) { + do.trMutex.Lock() + do.pluralMutex.Lock() + defer do.trMutex.Unlock() + defer do.pluralMutex.Unlock() + + if context, ok := do.contexts[ctx]; ok { + if trans, hasTrans := context[id]; hasTrans { + trans.Set(str) + } else { + trans = NewTranslation() + trans.ID = id + trans.Set(str) + context[id] = trans + } + } else { + trans := NewTranslation() + trans.ID = id + trans.Set(str) + do.contexts[ctx] = map[string]*Translation{ + id: trans, + } + } +} + // GetC retrieves the corresponding Translation for a given string in the given context. // Supports optional parameters (vars... interface{}) to be inserted on the formatted string using the fmt.Printf syntax. func (do *Domain) GetC(str, ctx string, vars ...interface{}) string { @@ -181,6 +350,35 @@ func (do *Domain) GetC(str, ctx string, vars ...interface{}) string { return Printf(str, vars...) } +// Set the (N)th plural form for the given string in the given context +func (do *Domain) SetNC(id, plural, ctx string, n int, str string) { + // Get plural form _before_ lock down + pluralForm := do.pluralForm(n) + + do.trMutex.Lock() + do.pluralMutex.Lock() + defer do.trMutex.Unlock() + defer do.pluralMutex.Unlock() + + if context, ok := do.contexts[ctx]; ok { + if trans, hasTrans := context[id]; hasTrans { + trans.SetN(pluralForm, str) + } else { + trans = NewTranslation() + trans.ID = id + trans.SetN(pluralForm, str) + context[id] = trans + } + } else { + trans := NewTranslation() + trans.ID = id + trans.SetN(pluralForm, str) + do.contexts[ctx] = map[string]*Translation{ + id: trans, + } + } +} + // GetNC retrieves the (N)th plural form of Translation for the given string in the given context. // Supports optional parameters (vars... interface{}) to be inserted on the formatted string using the fmt.Printf syntax. func (do *Domain) GetNC(str, plural string, n int, ctx string, vars ...interface{}) string { @@ -203,6 +401,187 @@ func (do *Domain) GetNC(str, plural string, n int, ctx string, vars ...interface return Printf(plural, vars...) } +type SourceReference struct { + path string + line int + context string + trans *Translation +} + +func extractPathAndLine(ref string) (string, int) { + var path string + var line int + colonIdx := strings.IndexRune(ref, ':') + if colonIdx >= 0 { + path = ref[:colonIdx] + line, _ = strconv.Atoi(ref[colonIdx+1:]) + } else { + path = ref + line = 0 + } + return path, line +} + +// MarshalText implements encoding.TextMarshaler interface +// Assists round-trip of POT/PO content +func (do *Domain) MarshalText() ([]byte, error) { + var buf bytes.Buffer + if len(do.headerComments) > 0 { + buf.WriteString(strings.Join(do.headerComments, "\n")) + buf.WriteByte(byte('\n')) + } + buf.WriteString("msgid \"\"\nmsgstr \"\"") + + // Standard order consistent with xgettext + headerOrder := map[string]int{ + "project-id-version": 0, + "report-msgid-bugs-to": 1, + "pot-creation-date": 2, + "po-revision-date": 3, + "last-translator": 4, + "language-team": 5, + "language": 6, + "mime-version": 7, + "content-type": 9, + "content-transfer-encoding": 10, + "plural-forms": 11, + } + + headerKeys := make([]string, 0, len(do.Headers)) + + for k, _ := range do.Headers { + headerKeys = append(headerKeys, k) + } + + sort.Slice(headerKeys, func(i, j int) bool { + var iOrder int + var jOrder int + var ok bool + if iOrder, ok = headerOrder[strings.ToLower(headerKeys[i])]; !ok { + iOrder = 8 + } + + if jOrder, ok = headerOrder[strings.ToLower(headerKeys[j])]; !ok { + jOrder = 8 + } + + if iOrder < jOrder { + return true + } + if iOrder > jOrder { + return false + } + return headerKeys[i] < headerKeys[j] + }) + + for _, k := range headerKeys { + // Access Headers map directly so as not to canonicalise + v := do.Headers[k] + + for _, value := range v { + buf.WriteString("\n\"" + k + ": " + value + "\\n\"") + } + } + + // Just as with headers, output translations in consistent order (to minimise diffs between round-trips), with (first) source reference taking priority, followed by context and finally ID + references := make([]SourceReference, 0) + for name, ctx := range do.contexts { + for id, trans := range ctx { + if id == "" { + continue + } + if len(trans.Refs) > 0 { + path, line := extractPathAndLine(trans.Refs[0]) + references = append(references, SourceReference{ + path, + line, + name, + trans, + }) + } else { + references = append(references, SourceReference{ + "", + 0, + name, + trans, + }) + } + } + } + + for id, trans := range do.translations { + if id == "" { + continue + } + + if len(trans.Refs) > 0 { + path, line := extractPathAndLine(trans.Refs[0]) + references = append(references, SourceReference{ + path, + line, + "", + trans, + }) + } else { + references = append(references, SourceReference{ + "", + 0, + "", + trans, + }) + } + } + + sort.Slice(references, func(i, j int) bool { + if references[i].path < references[j].path { + return true + } + if references[i].path > references[j].path { + return false + } + if references[i].line < references[j].line { + return true + } + if references[i].line > references[j].line { + return false + } + + if references[i].context < references[j].context { + return true + } + if references[i].context > references[j].context { + return false + } + return references[i].trans.ID < references[j].trans.ID + }) + + for _, ref := range references { + trans := ref.trans + if len(trans.Refs) > 0 { + buf.WriteString("\n\n#: " + strings.Join(trans.Refs, " ")) + } else { + buf.WriteByte(byte('\n')) + } + + if ref.context == "" { + buf.WriteString("\nmsgid \"" + trans.ID + "\"") + } else { + buf.WriteString("\nmsgctxt \"" + ref.context + "\"\nmsgid \"" + trans.ID + "\"") + } + + if trans.PluralID == "" { + buf.WriteString("\nmsgstr \"" + trans.Trs[0] + "\"") + } else { + buf.WriteString("\nmsgid_plural \"" + trans.PluralID + "\"") + for i, tr := range trans.Trs { + buf.WriteString("\nmsgstr[" + strconv.Itoa(i) + "] \"" + tr + "\"") + } + } + } + + return buf.Bytes(), nil +} + // MarshalBinary implements encoding.BinaryMarshaler interface func (do *Domain) MarshalBinary() ([]byte, error) { obj := new(TranslatorEncoding) diff --git a/mo.go b/mo.go index 27a5a96..5a7f8e8 100644 --- a/mo.go +++ b/mo.go @@ -8,7 +8,6 @@ package gotext import ( "bytes" "encoding/binary" - "net/textproto" ) const ( @@ -49,7 +48,7 @@ Example: */ type Mo struct { //these three public members are for backwards compatibility. they are just set to the value in the domain - Headers textproto.MIMEHeader + Headers HeaderMap Language string PluralForms string domain *Domain diff --git a/plurals/compiler.go b/plurals/compiler.go index 8c85017..ee82205 100644 --- a/plurals/compiler.go +++ b/plurals/compiler.go @@ -359,6 +359,9 @@ func tokenize(s string) []string { Eg: (foo) -> true; (foo)(bar) -> false; */ + if len(s) == 0 { + return []string{} + } if s[0] == '(' && s[len(s)-1] == ')' { s = s[1 : len(s)-1] } diff --git a/po.go b/po.go index 85d478f..84baa44 100644 --- a/po.go +++ b/po.go @@ -6,7 +6,6 @@ package gotext import ( - "net/textproto" "strconv" "strings" ) @@ -37,7 +36,7 @@ Example: */ type Po struct { //these three public members are for backwards compatibility. they are just set to the value in the domain - Headers textproto.MIMEHeader + Headers HeaderMap Language string PluralForms string @@ -66,23 +65,50 @@ func (po *Po) GetDomain() *Domain { return po.domain } -//all of these functions are for convenience and aid in backwards compatibility +// Convenience interfaces +func (po *Po) DropStaleTranslations() { + po.domain.DropStaleTranslations() +} + +func (po *Po) SetRefs(str string, refs []string) { + po.domain.SetRefs(str, refs) +} +func (po *Po) GetRefs(str string) []string { + return po.domain.GetRefs(str) +} + +func (po *Po) Set(id, str string) { + po.domain.Set(id, str) +} func (po *Po) Get(str string, vars ...interface{}) string { return po.domain.Get(str, vars...) } +func (po *Po) SetN(id, plural string, n int, str string) { + po.domain.SetN(id, plural, n, str) +} func (po *Po) GetN(str, plural string, n int, vars ...interface{}) string { return po.domain.GetN(str, plural, n, vars...) } +func (po *Po) SetC(id, ctx, str string) { + po.domain.SetC(id, ctx, str) +} func (po *Po) GetC(str, ctx string, vars ...interface{}) string { return po.domain.GetC(str, ctx, vars...) } +func (po *Po) SetNC(id, plural, ctx string, n int, str string) { + po.domain.SetNC(id, plural, ctx, n, str) +} func (po *Po) GetNC(str, plural string, n int, ctx string, vars ...interface{}) string { return po.domain.GetNC(str, plural, n, ctx, vars...) } +func (po *Po) MarshalText() ([]byte, error) { + return po.domain.MarshalText() +} + func (po *Po) MarshalBinary() ([]byte, error) { return po.domain.MarshalBinary() } @@ -103,7 +129,7 @@ func (po *Po) ParseFile(f string) { // Parse loads the translations specified in the provided string (str) func (po *Po) Parse(buf []byte) { if po.domain == nil { - panic("po.domain must be set when calling Parse") + panic("NewPo() was not used to instantiate this object") } // Lock while parsing @@ -118,6 +144,7 @@ func (po *Po) Parse(buf []byte) { // Init buffer po.domain.trBuffer = NewTranslation() po.domain.ctxBuffer = "" + po.domain.refBuffer = "" state := head for _, l := range lines { @@ -126,6 +153,7 @@ func (po *Po) Parse(buf []byte) { // Skip invalid lines if !po.isValidLine(l) { + po.parseComment(l, state) continue } @@ -198,7 +226,28 @@ func (po *Po) saveBuffer() { } // Flush Translation buffer - po.domain.trBuffer = NewTranslation() + if po.domain.refBuffer == "" { + po.domain.trBuffer = NewTranslation() + } else { + po.domain.trBuffer = NewTranslationWithRefs(strings.Split(po.domain.refBuffer, " ")) + } +} + +// Either preserves comments before the first "msgid", for later round-trip. +// Or preserves source references for a given translation. +func (po *Po) parseComment(l string, state parseState) { + if len(l) > 0 && l[0] == '#' { + if state == head { + po.domain.headerComments = append(po.domain.headerComments, l) + } else if len(l) > 1 { + switch l[1] { + case ':': + if len(l) > 2 { + po.domain.refBuffer = strings.TrimSpace(l[2:]) + } + } + } + } } // parseContext takes a line starting with "msgctxt", diff --git a/po_test.go b/po_test.go index 6cbc907..f29cbb9 100644 --- a/po_test.go +++ b/po_test.go @@ -589,3 +589,149 @@ func TestNewPoTranslatorRace(t *testing.T) { <-pc <-rc } + +func TestPoBinaryEncoding(t *testing.T) { + // Create po objects + po := NewPo() + po2 := NewPo() + + // Parse file + po.ParseFile("fixtures/en_US/default.po") + + buff, err := po.MarshalBinary() + if err != nil { + t.Fatal(err) + } + + err = po2.UnmarshalBinary(buff) + if err != nil { + t.Fatal(err) + } + + // Test translations + tr := po2.Get("My text") + if tr != "Translated text" { + t.Errorf("Expected 'Translated text' but got '%s'", tr) + } + // Test translations + tr = po2.Get("language") + if tr != "en_US" { + t.Errorf("Expected 'en_US' but got '%s'", tr) + } +} + +func TestPoTextEncoding(t *testing.T) { + // Create po objects + po := NewPo() + po2 := NewPo() + + // Parse file + po.ParseFile("fixtures/en_US/default.po") + + if _, ok := po.Headers["Pot-Creation-Date"]; ok { + t.Errorf("Expected non-canonicalised header, got canonicalised") + } else { + if _, ok = po.Headers["POT-Creation-Date"]; !ok { + t.Errorf("Expected non-canonicalised header, but it was missing") + } + } + + // Round-trip + buff, err := po.MarshalText() + if err != nil { + t.Fatal(err) + } + + po2.Parse(buff) + + for k, v := range po.Headers { + if v2, ok := po2.Headers[k]; ok { + for i, value := range v { + if value != v2[i] { + t.Errorf("TestPoTextEncoding: Header Difference for %s: %s vs %s", k, value, v2[i]) + } + } + } + } + + // Test translations + tr := po2.Get("My text") + if tr != "Translated text" { + t.Errorf("Expected 'Translated text' but got '%s'", tr) + } + + tr = po2.Get("language") + if tr != "en_US" { + t.Errorf("Expected 'en_US' but got '%s'", tr) + } + + tr = po2.Get("Some random") + if tr != "Some random translation" { + t.Errorf("Expected 'Some random translation' but got '%s'", tr) + } + + v := "Test" + tr = po.GetC("One with var: %s", "Ctx", v) + if tr != "This one is the singular in a Ctx context: Test" { + t.Errorf("Expected 'This one is the singular in a Ctx context: Test' but got '%s'", tr) + } + + tr = po.GetNC("One with var: %s", "Several with vars: %s", 17, "Ctx", v) + if tr != "This one is the plural in a Ctx context: Test" { + t.Errorf("Expected 'This one is the plural in a Ctx context: Test' but got '%s'", tr) + } + + // Another kind of round-trip + po.Set("My text", "Translated text") + po.Set("language", "en_US") + + // But remove 'the' + po.SetNC("One with var: %s", "Several with vars: %s", "Ctx", 1, "This one is singular in a Ctx context: %s") + po.SetNC("One with var: %s", "Several with vars: %s", "Ctx", 17, "This one is plural in a Ctx context: %s") + + po.DropStaleTranslations() + + buff, err = po.MarshalText() + if err != nil { + t.Fatal(err) + } + + po2 = NewPo() + po2.Parse(buff) + + for k, v := range po.Headers { + if v2, ok := po2.Headers[k]; ok { + for i, value := range v { + if value != v2[i] { + t.Errorf("Only translations should have been dropped, not headers") + } + } + } + } + + tr = po2.Get("My text") + if tr != "Translated text" { + t.Errorf("Expected 'Translated text' but got '%s'", tr) + } + tr = po2.Get("language") + if tr != "en_US" { + t.Errorf("Expected 'en_US' but got '%s'", tr) + } + + tr = po2.Get("Some random") + if tr == "Some random translation" || tr != "Some random" { + t.Errorf("Expected 'Some random' translation to be dropped; was present") + } + + // With 'the' removed? + v = "Test" + tr = po.GetC("One with var: %s", "Ctx", v) + if tr != "This one is singular in a Ctx context: Test" { + t.Errorf("Expected 'This one is singular in a Ctx context: Test' but got '%s'", tr) + } + + tr = po.GetNC("One with var: %s", "Several with vars: %s", 17, "Ctx", v) + if tr != "This one is plural in a Ctx context: Test" { + t.Errorf("Expected 'This one is plural in a Ctx context: Test' but got '%s'", tr) + } +} diff --git a/translation.go b/translation.go index bc069d4..26c30cf 100644 --- a/translation.go +++ b/translation.go @@ -10,14 +10,37 @@ type Translation struct { ID string PluralID string Trs map[int]string + Refs []string + + dirty bool } // NewTranslation returns the Translation object and initialized it. func NewTranslation() *Translation { - tr := new(Translation) - tr.Trs = make(map[int]string) + return &Translation{ + Trs: make(map[int]string), + } +} - return tr +func NewTranslationWithRefs(refs []string) *Translation { + return &Translation{ + Trs: make(map[int]string), + Refs: refs, + } +} + +func (t *Translation) IsStale() bool { + return t.dirty == false +} + +func (t *Translation) SetRefs(refs []string) { + t.Refs = refs + t.dirty = true +} + +func (t *Translation) Set(str string) { + t.Trs[0] = str + t.dirty = true } // Get returns the string of the translation @@ -33,6 +56,11 @@ func (t *Translation) Get() string { return t.ID } +func (t *Translation) SetN(n int, str string) { + t.Trs[n] = str + t.dirty = true +} + // GetN returns the string of the plural translation func (t *Translation) GetN(n int) string { // Look for Translation index diff --git a/translator.go b/translator.go index 69c211c..0affd42 100644 --- a/translator.go +++ b/translator.go @@ -8,7 +8,6 @@ package gotext import ( "errors" "io/ioutil" - "net/textproto" "os" ) @@ -31,7 +30,7 @@ type Translator interface { // TranslatorEncoding is used as intermediary storage to encode Translator objects to Gob. type TranslatorEncoding struct { // Headers storage - Headers textproto.MIMEHeader + Headers HeaderMap // Language header Language string