From 8335f115f25438fdb44abb4c238bc25c4bc9ec49 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Wed, 28 Oct 2020 22:52:06 +0100 Subject: [PATCH 01/54] new shiny create_model code that is more memory efficient --- csv.go | 12 +- extras/create_model_v2.py | 208 ++ extras/templates/filters.jinja2 | 21 + extras/templates/initColumn.template.jinja2 | 5 + extras/templates/itemFullColumn.jinja2 | 2 + extras/templates/model.template.jinja2 | 227 ++ extras/templates/registerFilters.jinja2 | 8 + extras/templates/shrinkColumn.jinja2 | 12 + extras/templates/shrinkVars.jinja2 | 5 + extras/templates/sortfunc.jinja2 | 4 + http_handlers.go | 23 +- main.go | 3 +- model.go | 2139 +++++++++++++++++-- operations.go | 371 +++- 14 files changed, 2728 insertions(+), 312 deletions(-) create mode 100644 extras/create_model_v2.py create mode 100644 extras/templates/filters.jinja2 create mode 100644 extras/templates/initColumn.template.jinja2 create mode 100644 extras/templates/itemFullColumn.jinja2 create mode 100644 extras/templates/model.template.jinja2 create mode 100644 extras/templates/registerFilters.jinja2 create mode 100644 extras/templates/shrinkColumn.jinja2 create mode 100644 extras/templates/shrinkVars.jinja2 create mode 100644 extras/templates/sortfunc.jinja2 diff --git a/csv.go b/csv.go index f748c4b..431014c 100644 --- a/csv.go +++ b/csv.go @@ -62,8 +62,8 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, items := Items{} for { - item := Item{} - columns := item.Columns() + itemFull := ItemFull{} + columns := itemFull.Columns() cols := make([]interface{}, len(columns)) record, err := reader.Read() @@ -98,7 +98,7 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, // marschall it to bytes b, _ := json.Marshal(itemMap) // fill the new Item instance with values - if err := json.Unmarshal([]byte(b), &item); err != nil { + if err := json.Unmarshal([]byte(b), &itemFull); err != nil { line := strings.Join(record, delimiter) failed++ @@ -115,12 +115,14 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, itemChan <- items items = Items{} } - items = append(items, &item) + smallItem := itemFull.Shrink() + items = append(items, &smallItem) success++ } // add leftover items itemChan <- items + items = nil return nil, success, failed } @@ -179,7 +181,7 @@ func importCSV(filename string, itemChan ItemsChannel, return fmt.Errorf("line %d: %s", lineNumber, err) } - fmt.Printf("%d rows imported", success) + fmt.Printf("%d rows imported\n", success) if ignoreErrors && failed > 0 { fmt.Printf("%d rows could not be imported and have been written to stderr.", failed) diff --git a/extras/create_model_v2.py b/extras/create_model_v2.py new file mode 100644 index 0000000..a070240 --- /dev/null +++ b/extras/create_model_v2.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +""" +Load first rows from csv, ask some questions +and generate a models.go to jumpstart +your project for the given csv file. + +Much morge memory efficient then v1 because repeated +values are now stored in a map and each individual item +only stores uint16 reference. + +python create_model.py your.csv +""" + +import csv +import sys + +from re import sub +from jinja2 import Environment, FileSystemLoader + +if '-f' in sys.argv: + filename = str(sys.argv[sys.argv.index('-f')+1]) +else: + filename = "items.csv" + +with open(filename) as f: + reader = csv.DictReader(f) + row = dict(next(reader)) + +env = Environment( + loader=FileSystemLoader('./templates'), +) + +# keep track of all column names org are original names +allcolumns = [] +repeated = [] +repeated_org = [] +unique = [] +unique_org = [] + + +def gocamelCase(string): + """convert string to camelCase + + woning_type -> WoningType + """ + string = sub(r"(_|-)+", " ", string).title().replace(" ", "") + return string + + +# ask some questions about columns. +index = 0 +for k in row.keys(): + kc = gocamelCase(k) + + while True: + # keep asking for valid input + q1 = "a repeated value? has less then (2^16=65536) values? Y/n?" + yesno = input(f"idx:{index} is {k} {q1}") # noqa + if yesno == '': + yesno = 'y' + if yesno not in ['y', 'n']: + continue + break + + if yesno == 'y': + repeated.append(kc) + repeated_org.append(k) + else: + unique.append(kc) + unique_org.append(k) + + allcolumns.append(kc) + index += 1 + +# ask for a index column +while True: + # keep asking for valid input + index = input(f"which column is idx? 0 - {len(allcolumns) - 1} ") + try: + index = int(index) + if index < len(allcolumns): + break + except ValueError: + continue + print('try again..') + +# setup initial data structs for each repeated column +initRepeatColumns = [] +initColumntemplate = env.get_template('initColumn.template.jinja2') + +for c in repeated: + initRepeatColumns.append(initColumntemplate.render(columnname=c)) + +# create ItemFull struct fields +columnsItemFull = [] +jsonColumn = env.get_template('itemFullColumn.jinja2') +for c1, c2 in zip(allcolumns, row.keys()): + onerow = jsonColumn.render(c1=c1, c2=c2) + columnsItemFull.append(onerow) + +# create Item struct fields +columnsItem = [] +for c1, c2 in zip(allcolumns, row.keys()): + onerow = f"\t{c1} string\n" + if c1 in repeated: + onerow = f"\t{c1} uint16\n" + columnsItem.append(onerow) + + +# create Shrink code for repeated fields +# where we map uint16 to a string value. +shrinkVars = [] +shrinkItems = [] +shrinkvartemplate = env.get_template('shrinkVars.jinja2') +shrinktemplate = env.get_template('shrinkColumn.jinja2') +for c in repeated: + shrinkVars.append(shrinkvartemplate.render(column=c)) + shrinkItems.append(shrinktemplate.render(column=c)) + + +# create the actual shrinked/expand Item fields. +shrinkItemFields = [] +expandItemFields = [] + +for c in allcolumns: + if c in repeated: + # string to unint + shrinkItemFields.append(f"\t\t{c}IdxMap[i.{c}],\n") + # unint back to string + expandItemFields.append(f"\t\t{c}[i.{c}],\n") + else: + shrinkItemFields.append(f"\t\ti.{c},\n") + expandItemFields.append(f"\t\ti.{c},\n") + + +originalColumns = [] +for c in row.keys(): + originalColumns.append(f'\t\t"{c}",\n') + +# create column filters. +# match, startswith, contains etc + +columnFilters = [] +filtertemplate = env.get_template("filters.jinja2") + +for c in allcolumns: + lookup = f"i.{c}" + if c in repeated: + lookup = f"{c}[i.{c}]" + + txt = filtertemplate.render(column=c, lookup=lookup) + columnFilters.append(txt) + +registerFilters = [] +rtempl = env.get_template('registerFilters.jinja2') +# register filters +for c, co in zip(allcolumns, row.keys()): + txt = rtempl.render(co=co, column=c) + registerFilters.append(txt) + + +sortColumns = [] +sortTemplate = env.get_template('sortfunc.jinja2') + +# create sort functions +for co, c in zip(row.keys(), allcolumns): + + c1 = f"items[i].{c} < items[j].{c}" + c2 = f"items[i].{c} > items[j].{c}" + + if c in repeated: + c1 = f"{c}[items[i].{c}] < {c}[items[j].{c}]" + c2 = f"{c}[items[i].{c}] > {c}[items[j].{c}]" + + txt = sortTemplate.render(co=co, c1=c1, c2=c2) + sortColumns.append(txt) + + +csv_columns = [] +for c in row.keys(): + csv_columns.append(f'\t"{c}",\n') + + +# Finally render the model.go template +modeltemplate = env.get_template('model.template.jinja2') + +output = modeltemplate.render( + initRepeatColumns=''.join(initRepeatColumns), + columnsItemFull=''.join(columnsItemFull), + columnsItem=''.join(columnsItem), + shrinkVars=''.join(shrinkVars), + shrinkItems=''.join(shrinkItems), + shrinkItemFields=''.join(shrinkItemFields), + expandItemFields=''.join(expandItemFields), + csv_columns=''.join(csv_columns), + originalColumns=''.join(originalColumns), + columnFilters=''.join(columnFilters), + registerFilters=''.join(registerFilters), + sortColumns=''.join(sortColumns), + indexcolumn=allcolumns[index] +) + +f = open('model.go', 'w') +f.write(output) +f.close() + +print('saved in model.go') +print('!!NOTE!! edit the default search filter') diff --git a/extras/templates/filters.jinja2 b/extras/templates/filters.jinja2 new file mode 100644 index 0000000..a63578a --- /dev/null +++ b/extras/templates/filters.jinja2 @@ -0,0 +1,21 @@ + +// contain filter {{column}} +func Filter{{column}}Contains(i *Item, s string) bool { + return strings.Contains({{lookup}}, s) +} + + +// startswith filter {{column}} +func Filter{{column}}StartsWith(i *Item, s string) bool { + return strings.HasPrefix({{lookup}}, s) +} + +// match filters {{column}} +func Filter{{column}}Match(i *Item, s string) bool { + return {{lookup}} == s +} + +// getter {{column}} +func Getters{{column}}(i *Item) string { + return {{lookup}} +} diff --git a/extras/templates/initColumn.template.jinja2 b/extras/templates/initColumn.template.jinja2 new file mode 100644 index 0000000..f9a4052 --- /dev/null +++ b/extras/templates/initColumn.template.jinja2 @@ -0,0 +1,5 @@ + + {{columnname}}Tracker = 0 + {{columnname}}IdxMap = make(fieldIdxMap) + {{columnname}} = make(fieldMapIdx) + diff --git a/extras/templates/itemFullColumn.jinja2 b/extras/templates/itemFullColumn.jinja2 new file mode 100644 index 0000000..2113443 --- /dev/null +++ b/extras/templates/itemFullColumn.jinja2 @@ -0,0 +1,2 @@ + {{c1}} string `json:"{{c2}}"` + diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 new file mode 100644 index 0000000..8e75ea8 --- /dev/null +++ b/extras/templates/model.template.jinja2 @@ -0,0 +1,227 @@ +package main + +import ( + "sort" + "strconv" + "strings" + "sync" +) + +type fieldIdxMap map[string]uint16 +type fieldMapIdx map[uint16]string +type fieldItemmap map[uint16][]*Item + +// Column maps. +// Store for each non distinct/repeated column +// unit16 -> string map and +// string -> unit16 map +// track count of distinct values + +{{shrinkVars}} + +/* +var {columnname}Tracker uint16 +var {columnname}IdxMap fieldIdxMap +var {columnname} fieldMapIdx +var {columnname}Items fieldItemmap +*/ + +var lock = sync.RWMutex{} + +func init() { + + {{initRepeatColumns}} + + /* + labelscoredefinitiefTracker = 0 + labelscoredefinitiefIdxMap = make(fieldIdxMap) + labelscoredefinitief = make(fieldMapIdx) + */ +} + +{{itemStructs}} + + +type ItemFull struct { + +{{columnsItemFull}} + +} + +type Item struct { + +{{columnsItem}} + +} + +func (i Item) Columns() []string { + + return []string{ + {{csv_columns}} + } +} + +// Shrink create smaller Item using uint16 +func (i ItemFull) Shrink() Item { + + lock.Lock() + defer lock.Unlock() + +{{shrinkItems}} + + return Item{ + +{{shrinkItemFields}} + + } +} + +func (i Item) Serialize() ItemFull { + + lock.RLock() + defer lock.RUnlock() + + return ItemFull{ + +{{expandItemFields}} + + } +} + +func (i ItemFull) Columns() []string { + return []string{ + +{{originalColumns}} + + } +} + +func (i Item) Row() []string { + + lock.RLock() + defer lock.RUnlock() + + return []string{ + +{{expandItemFields}} + + } +} + +func (i Item) GetIndex()string{ + return Getters{{indexcolumn}}(&i) +} + +{{columnFilters}} + +/* +// contain filters +func FilterEkeyContains(i *Item, s string) bool { + return strings.Contains(i.Ekey, s) +} + + +// startswith filters +func FilterEkeyStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Ekey, s) +} + + +// match filters +func FilterEkeyMatch(i *Item, s string) bool { + return i.Ekey == s +} + +// getters +func GettersEkey(i *Item) string { + return i.Ekey +} +*/ + +// reduce functions +func reduceCount(items Items) map[string]string { + result := make(map[string]string) + result["count"] = strconv.Itoa(len(items)) + return result +} + +type GroupedOperations struct { + Funcs registerFuncType + GroupBy registerGroupByFunc + Getters registerGettersMap + Reduce registerReduce +} + +var Operations GroupedOperations + +var RegisterFuncMap registerFuncType +var RegisterGroupBy registerGroupByFunc +var RegisterGetters registerGettersMap +var RegisterReduce registerReduce + +func init() { + + RegisterFuncMap = make(registerFuncType) + RegisterGroupBy = make(registerGroupByFunc) + RegisterGetters = make(registerGettersMap) + RegisterReduce = make(registerReduce) + + // register search filter. + //RegisterFuncMap["search"] = 'EDITYOURSELF' + + // register filters + +{{registerFilters}} + + /* + RegisterFuncMap["match-ekey"] = FilterEkeyMatch + RegisterFuncMap["contains-ekey"] = FilterEkeyContains + // register startswith filters + RegisterFuncMap["startswith-ekey"] = FilterEkeyStartsWith + // register getters + RegisterGetters["ekey"] = GettersEkey + // register groupby + RegisterGroupBy["ekey"] = GettersEkey + + // register reduce functions + RegisterReduce["count"] = reduceCount + */ +} + +type sortLookup map[string]func(int, int) bool + +func createSort(items Items) sortLookup { + + sortFuncs := sortLookup{ + + {{sortColumns}} + + /* + "ekey": func(i, j int) bool { return items[i].Ekey < items[j].Ekey }, + "-ekey": func(i, j int) bool { return items[i].Ekey > items[j].Ekey }, + */ + } + return sortFuncs +} + + +func sortBy(items Items, sortingL []string) (Items, []string) { + + lock.Lock() + defer lock.Unlock() + + sortFuncs := createSort(items) + + for _, sortFuncName := range sortingL { + sortFunc := sortFuncs[sortFuncName] + sort.Slice(items, sortFunc) + } + + // TODO must be nicer way + keys := []string{} + for key := range sortFuncs { + keys = append(keys, key) + } + + return items, keys +} diff --git a/extras/templates/registerFilters.jinja2 b/extras/templates/registerFilters.jinja2 new file mode 100644 index 0000000..299c12c --- /dev/null +++ b/extras/templates/registerFilters.jinja2 @@ -0,0 +1,8 @@ + + //register filters for {{column}} + RegisterFuncMap["match-{{co}}"] = Filter{{column}}Match + RegisterFuncMap["contains-{{co}}"] = Filter{{column}}Contains + RegisterFuncMap["startswith-{{co}}"] = Filter{{column}}StartsWith + RegisterGetters["{{co}}"] = Getters{{column}} + RegisterGroupBy["{{co}}"] = Getters{{column}} + diff --git a/extras/templates/shrinkColumn.jinja2 b/extras/templates/shrinkColumn.jinja2 new file mode 100644 index 0000000..a63bcb0 --- /dev/null +++ b/extras/templates/shrinkColumn.jinja2 @@ -0,0 +1,12 @@ + + //check if column value is already present + //else store new key + if _, ok := {{column}}IdxMap[i.{{column}}]; !ok { + // store {{column}} in map at current index of tracker + {{column}}[{{column}}Tracker] = i.{{column}} + // store key - idx + {{column}}IdxMap[i.{{column}}] = {{column}}Tracker + // increase tracker + {{column}}Tracker += 1 + } + diff --git a/extras/templates/shrinkVars.jinja2 b/extras/templates/shrinkVars.jinja2 new file mode 100644 index 0000000..bdc61fe --- /dev/null +++ b/extras/templates/shrinkVars.jinja2 @@ -0,0 +1,5 @@ + +var {{column}}Tracker uint16 +var {{column}}IdxMap fieldIdxMap +var {{column}} fieldMapIdx + diff --git a/extras/templates/sortfunc.jinja2 b/extras/templates/sortfunc.jinja2 new file mode 100644 index 0000000..3773c7d --- /dev/null +++ b/extras/templates/sortfunc.jinja2 @@ -0,0 +1,4 @@ + + "{{co}}": func(i, j int) bool { return {{c1}} }, + "-{{co}}": func(i, j int) bool { return {{c2}} }, + diff --git a/http_handlers.go b/http_handlers.go index 26a1ede..5ccef53 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -334,20 +334,29 @@ type Meta struct { } type searchResponse struct { - Count int `json:"count"` - Data Items `json:"data"` - MMeta *Meta `json:"meta"` + Count int `json:"count"` + Data ItemsFull `json:"data"` + MMeta *Meta `json:"meta"` } func makeResp(items Items) searchResponse { + + itemsfull := make(ItemsFull, 0, len(items)) + + for _, oneitem := range items { + orgItem := oneitem.Serialize() + itemsfull = append(itemsfull, &orgItem) + } + fields := []ShowItem{} - for _, column := range items[0].Columns() { + columns := ItemFull{}.Columns() + for _, column := range columns { fields = append(fields, ShowItem{IsShow: true, Name: column, Label: column}) } return searchResponse{ Count: len(items), - Data: items, + Data: itemsfull, MMeta: &Meta{Fields: fields, View: "table"}, } } @@ -422,6 +431,7 @@ func contextTypeAheadRest(JWTConig jwtConfig, itemChan ItemsChannel, operations column = column[:len(column)-1] } if _, ok := operations.Getters[column]; !ok { + w.Write([]byte("500 wrong column name")) w.WriteHeader(404) w.Write([]byte("column is not found")) return @@ -442,8 +452,8 @@ func contextTypeAheadRest(JWTConig jwtConfig, itemChan ItemsChannel, operations } w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(results) + results = nil } } @@ -513,6 +523,5 @@ func helpRest(w http.ResponseWriter, r *http.Request) { fmt.Sprintf("typeahead use the name of the column in this case IP: http://%s/typeahead/ip/?starts-with=127&limit=15", host), } w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(response) } diff --git a/main.go b/main.go index 9e2c88c..0aed0a8 100644 --- a/main.go +++ b/main.go @@ -20,6 +20,7 @@ type registerFormatMap map[string]formatRespFunc //Items as Example type Items []*Item +type ItemsFull []*ItemFull type ItemsGroupedBy map[string]Items type ItemsChannel chan Items @@ -58,7 +59,7 @@ func init() { func loadcsv(itemChan ItemsChannel) { log.Print("loading given csv") err := importCSV(SETTINGS.Get("csv"), itemChan, - true, true, + false, true, SETTINGS.Get("delimiter"), SETTINGS.Get("null-delimiter")) if err != nil { diff --git a/model.go b/model.go index 7d2b556..e2e53d2 100644 --- a/model.go +++ b/model.go @@ -1,184 +1,1482 @@ +/* + model.go define the 'items' to store. + All columns with getters and setters are defined here. + + ItemIn, represent rows from the Input data + Item, the compact item stored in memmory + ItemOut, defines how and which fields are exported out + of the API. It is possible to ignore input columns + + Repeated values are stored in maps with int numbers + as keys. Optionally bitarrays are created for reapeated + column values to do fast bit-wise filtering. + + A S2 geo index in created for lat, lon values. + + Unique values are stored as-is. + + The generated codes leaves room to create custom + index functions yourself to create an API with an + < 1 ms response time for your specific needs. + + This codebase solves: I need to have an API on this + tabular dataset fast! +*/ + package main -import ( - "sort" - "strconv" - "strings" -) +import ( + "encoding/json" + "errors" + "log" + "sort" + "strconv" + "strings" + "sync" + + "github.com/Workiva/go-datastructures/bitarray" +) + +type registerGroupByFunc map[string]func(*Item) string +type registerGettersMap map[string]func(*Item) string +type registerReduce map[string]func(Items) map[string]string + +type registerBitArray map[string]func(s string) (bitarray.BitArray, error) + +type fieldIdxMap map[string]uint16 +type fieldMapIdx map[uint16]string +type fieldItemsMap map[uint16]bitarray.BitArray + +// Column maps. +// Store for each non distinct/repeated column +// unit16 -> string map and +// string -> unit16 map +// track count of distinct values + +var WoningTypeTracker uint16 +var WoningTypeIdxMap fieldIdxMap +var WoningType fieldMapIdx + +var WoningTypeItems fieldItemsMap + +var LabelscoreVoorlopigTracker uint16 +var LabelscoreVoorlopigIdxMap fieldIdxMap +var LabelscoreVoorlopig fieldMapIdx + +var LabelscoreVoorlopigItems fieldItemsMap + +var LabelscoreDefinitiefTracker uint16 +var LabelscoreDefinitiefIdxMap fieldIdxMap +var LabelscoreDefinitief fieldMapIdx + +var LabelscoreDefinitiefItems fieldItemsMap + +var GemeentecodeTracker uint16 +var GemeentecodeIdxMap fieldIdxMap +var Gemeentecode fieldMapIdx + +var GemeentecodeItems fieldItemsMap + +var GemeentenaamTracker uint16 +var GemeentenaamIdxMap fieldIdxMap +var Gemeentenaam fieldMapIdx + +var BuurtcodeTracker uint16 +var BuurtcodeIdxMap fieldIdxMap +var Buurtcode fieldMapIdx + +var BuurtcodeItems fieldItemsMap + +var BuurtnaamTracker uint16 +var BuurtnaamIdxMap fieldIdxMap +var Buurtnaam fieldMapIdx + +var WijkcodeTracker uint16 +var WijkcodeIdxMap fieldIdxMap +var Wijkcode fieldMapIdx + +var WijkcodeItems fieldItemsMap + +var WijknaamTracker uint16 +var WijknaamIdxMap fieldIdxMap +var Wijknaam fieldMapIdx + +var ProvinciecodeTracker uint16 +var ProvinciecodeIdxMap fieldIdxMap +var Provinciecode fieldMapIdx + +var ProvinciecodeItems fieldItemsMap + +var ProvincienaamTracker uint16 +var ProvincienaamIdxMap fieldIdxMap +var Provincienaam fieldMapIdx + +var PandGasEanAansluitingenTracker uint16 +var PandGasEanAansluitingenIdxMap fieldIdxMap +var PandGasEanAansluitingen fieldMapIdx + +var P6GasAansluitingen2020Tracker uint16 +var P6GasAansluitingen2020IdxMap fieldIdxMap +var P6GasAansluitingen2020 fieldMapIdx + +var P6Gasm32020Tracker uint16 +var P6Gasm32020IdxMap fieldIdxMap +var P6Gasm32020 fieldMapIdx + +var P6Kwh2020Tracker uint16 +var P6Kwh2020IdxMap fieldIdxMap +var P6Kwh2020 fieldMapIdx + +var PandBouwjaarTracker uint16 +var PandBouwjaarIdxMap fieldIdxMap +var PandBouwjaar fieldMapIdx + +var PandGasAansluitingenTracker uint16 +var PandGasAansluitingenIdxMap fieldIdxMap +var PandGasAansluitingen fieldMapIdx + +var GebruiksdoelenTracker uint16 +var GebruiksdoelenIdxMap fieldIdxMap +var Gebruiksdoelen fieldMapIdx + +/* +var {columnname}Tracker uint16 +var {columnname}IdxMap fieldIdxMap +var {columnname} fieldMapIdx +var {columnname}Items fieldItemmap +*/ + +// item map lock +var lock = sync.RWMutex{} + +// bitArray Lock +var balock = sync.RWMutex{} + +func init() { + + WoningTypeTracker = 0 + WoningTypeIdxMap = make(fieldIdxMap) + WoningType = make(fieldMapIdx) + + WoningTypeItems = make(fieldItemsMap) + + LabelscoreVoorlopigTracker = 0 + LabelscoreVoorlopigIdxMap = make(fieldIdxMap) + LabelscoreVoorlopig = make(fieldMapIdx) + + LabelscoreVoorlopigItems = make(fieldItemsMap) + + LabelscoreDefinitiefTracker = 0 + LabelscoreDefinitiefIdxMap = make(fieldIdxMap) + LabelscoreDefinitief = make(fieldMapIdx) + + LabelscoreDefinitiefItems = make(fieldItemsMap) + + GemeentecodeTracker = 0 + GemeentecodeIdxMap = make(fieldIdxMap) + Gemeentecode = make(fieldMapIdx) + + GemeentecodeItems = make(fieldItemsMap) + + GemeentenaamTracker = 0 + GemeentenaamIdxMap = make(fieldIdxMap) + Gemeentenaam = make(fieldMapIdx) + + BuurtcodeTracker = 0 + BuurtcodeIdxMap = make(fieldIdxMap) + Buurtcode = make(fieldMapIdx) + + BuurtcodeItems = make(fieldItemsMap) + + BuurtnaamTracker = 0 + BuurtnaamIdxMap = make(fieldIdxMap) + Buurtnaam = make(fieldMapIdx) + + WijkcodeTracker = 0 + WijkcodeIdxMap = make(fieldIdxMap) + Wijkcode = make(fieldMapIdx) + + WijkcodeItems = make(fieldItemsMap) + + WijknaamTracker = 0 + WijknaamIdxMap = make(fieldIdxMap) + Wijknaam = make(fieldMapIdx) + + ProvinciecodeTracker = 0 + ProvinciecodeIdxMap = make(fieldIdxMap) + Provinciecode = make(fieldMapIdx) + + ProvinciecodeItems = make(fieldItemsMap) + + ProvincienaamTracker = 0 + ProvincienaamIdxMap = make(fieldIdxMap) + Provincienaam = make(fieldMapIdx) + + PandGasEanAansluitingenTracker = 0 + PandGasEanAansluitingenIdxMap = make(fieldIdxMap) + PandGasEanAansluitingen = make(fieldMapIdx) + + P6GasAansluitingen2020Tracker = 0 + P6GasAansluitingen2020IdxMap = make(fieldIdxMap) + P6GasAansluitingen2020 = make(fieldMapIdx) + + P6Gasm32020Tracker = 0 + P6Gasm32020IdxMap = make(fieldIdxMap) + P6Gasm32020 = make(fieldMapIdx) + + P6Kwh2020Tracker = 0 + P6Kwh2020IdxMap = make(fieldIdxMap) + P6Kwh2020 = make(fieldMapIdx) + + PandBouwjaarTracker = 0 + PandBouwjaarIdxMap = make(fieldIdxMap) + PandBouwjaar = make(fieldMapIdx) + + PandGasAansluitingenTracker = 0 + PandGasAansluitingenIdxMap = make(fieldIdxMap) + PandGasAansluitingen = make(fieldMapIdx) + + GebruiksdoelenTracker = 0 + GebruiksdoelenIdxMap = make(fieldIdxMap) + Gebruiksdoelen = make(fieldMapIdx) + + /* + labelscoredefinitiefTracker = 0 + labelscoredefinitiefIdxMap = make(fieldIdxMap) + labelscoredefinitief = make(fieldMapIdx) + */ +} + +type ItemIn struct { + Pid string `json:"pid"` + Vid string `json:"vid"` + Numid string `json:"numid"` + Postcode string `json:"postcode"` + Oppervlakte string `json:"oppervlakte"` + Woningequivalent string `json:"woningequivalent"` + Adres string `json:"adres"` + WoningType string `json:"woning_type"` + LabelscoreVoorlopig string `json:"labelscore_voorlopig"` + LabelscoreDefinitief string `json:"labelscore_definitief"` + Gemeentecode string `json:"gemeentecode"` + Gemeentenaam string `json:"gemeentenaam"` + Buurtcode string `json:"buurtcode"` + Buurtnaam string `json:"buurtnaam"` + Wijkcode string `json:"wijkcode"` + Wijknaam string `json:"wijknaam"` + Provinciecode string `json:"provinciecode"` + Provincienaam string `json:"provincienaam"` + Point string `json:"point"` + PandGasEanAansluitingen string `json:"pand_gas_ean_aansluitingen"` + GroupId2020 string `json:"group_id_2020"` + P6GasAansluitingen2020 string `json:"p6_gas_aansluitingen_2020"` + P6Gasm32020 string `json:"p6_gasm3_2020"` + P6Kwh2020 string `json:"p6_kwh_2020"` + P6TotaalPandoppervlakM2 string `json:"p6_totaal_pandoppervlak_m2"` + PandBouwjaar string `json:"pand_bouwjaar"` + PandGasAansluitingen string `json:"pand_gas_aansluitingen"` + Gebruiksdoelen string `json:"gebruiksdoelen"` +} + +type ItemOut struct { + Pid string `json:"pid"` + Vid string `json:"vid"` + Numid string `json:"numid"` + Postcode string `json:"postcode"` + Oppervlakte string `json:"oppervlakte"` + Woningequivalent string `json:"woningequivalent"` + Adres string `json:"adres"` + WoningType string `json:"woning_type"` + LabelscoreVoorlopig string `json:"labelscore_voorlopig"` + LabelscoreDefinitief string `json:"labelscore_definitief"` + Gemeentecode string `json:"gemeentecode"` + Gemeentenaam string `json:"gemeentenaam"` + Buurtcode string `json:"buurtcode"` + Buurtnaam string `json:"buurtnaam"` + Wijkcode string `json:"wijkcode"` + Wijknaam string `json:"wijknaam"` + Provinciecode string `json:"provinciecode"` + Provincienaam string `json:"provincienaam"` + Point string `json:"point"` + PandGasEanAansluitingen string `json:"pand_gas_ean_aansluitingen"` + GroupId2020 string `json:"group_id_2020"` + P6GasAansluitingen2020 string `json:"p6_gas_aansluitingen_2020"` + P6Gasm32020 string `json:"p6_gasm3_2020"` + P6Kwh2020 string `json:"p6_kwh_2020"` + P6TotaalPandoppervlakM2 string `json:"p6_totaal_pandoppervlak_m2"` + PandBouwjaar string `json:"pand_bouwjaar"` + PandGasAansluitingen string `json:"pand_gas_aansluitingen"` + Gebruiksdoelen string `json:"gebruiksdoelen"` +} + +type Item struct { + Label int // internal index in ITEMS + Pid string + Vid string + Numid string + Postcode string + Oppervlakte string + Woningequivalent string + Adres string + WoningType uint16 + LabelscoreVoorlopig uint16 + LabelscoreDefinitief uint16 + Gemeentecode uint16 + Gemeentenaam uint16 + Buurtcode uint16 + Buurtnaam uint16 + Wijkcode uint16 + Wijknaam uint16 + Provinciecode uint16 + Provincienaam uint16 + Point string + PandGasEanAansluitingen uint16 + GroupId2020 string + P6GasAansluitingen2020 uint16 + P6Gasm32020 uint16 + P6Kwh2020 uint16 + P6TotaalPandoppervlakM2 string + PandBouwjaar uint16 + PandGasAansluitingen uint16 + Gebruiksdoelen []uint16 +} + +func (i Item) MarshalJSON() ([]byte, error) { + return json.Marshal(i.Serialize()) +} + +// Shrink create smaller Item using uint16 +func (i ItemIn) Shrink(label int) Item { + + lock.Lock() + defer lock.Unlock() + + //check if column value is already present + //else store new key + if _, ok := WoningTypeIdxMap[i.WoningType]; !ok { + // store WoningType in map at current index of tracker + WoningType[WoningTypeTracker] = i.WoningType + // store key - idx + WoningTypeIdxMap[i.WoningType] = WoningTypeTracker + // increase tracker + WoningTypeTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := LabelscoreVoorlopigIdxMap[i.LabelscoreVoorlopig]; !ok { + // store LabelscoreVoorlopig in map at current index of tracker + LabelscoreVoorlopig[LabelscoreVoorlopigTracker] = i.LabelscoreVoorlopig + // store key - idx + LabelscoreVoorlopigIdxMap[i.LabelscoreVoorlopig] = LabelscoreVoorlopigTracker + // increase tracker + LabelscoreVoorlopigTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := LabelscoreDefinitiefIdxMap[i.LabelscoreDefinitief]; !ok { + // store LabelscoreDefinitief in map at current index of tracker + LabelscoreDefinitief[LabelscoreDefinitiefTracker] = i.LabelscoreDefinitief + // store key - idx + LabelscoreDefinitiefIdxMap[i.LabelscoreDefinitief] = LabelscoreDefinitiefTracker + // increase tracker + LabelscoreDefinitiefTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := GemeentecodeIdxMap[i.Gemeentecode]; !ok { + // store Gemeentecode in map at current index of tracker + Gemeentecode[GemeentecodeTracker] = i.Gemeentecode + // store key - idx + GemeentecodeIdxMap[i.Gemeentecode] = GemeentecodeTracker + // increase tracker + GemeentecodeTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := GemeentenaamIdxMap[i.Gemeentenaam]; !ok { + // store Gemeentenaam in map at current index of tracker + Gemeentenaam[GemeentenaamTracker] = i.Gemeentenaam + // store key - idx + GemeentenaamIdxMap[i.Gemeentenaam] = GemeentenaamTracker + // increase tracker + GemeentenaamTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := BuurtcodeIdxMap[i.Buurtcode]; !ok { + // store Buurtcode in map at current index of tracker + Buurtcode[BuurtcodeTracker] = i.Buurtcode + // store key - idx + BuurtcodeIdxMap[i.Buurtcode] = BuurtcodeTracker + // increase tracker + BuurtcodeTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := BuurtnaamIdxMap[i.Buurtnaam]; !ok { + // store Buurtnaam in map at current index of tracker + Buurtnaam[BuurtnaamTracker] = i.Buurtnaam + // store key - idx + BuurtnaamIdxMap[i.Buurtnaam] = BuurtnaamTracker + // increase tracker + BuurtnaamTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := WijkcodeIdxMap[i.Wijkcode]; !ok { + // store Wijkcode in map at current index of tracker + Wijkcode[WijkcodeTracker] = i.Wijkcode + // store key - idx + WijkcodeIdxMap[i.Wijkcode] = WijkcodeTracker + // increase tracker + WijkcodeTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := WijknaamIdxMap[i.Wijknaam]; !ok { + // store Wijknaam in map at current index of tracker + Wijknaam[WijknaamTracker] = i.Wijknaam + // store key - idx + WijknaamIdxMap[i.Wijknaam] = WijknaamTracker + // increase tracker + WijknaamTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := ProvinciecodeIdxMap[i.Provinciecode]; !ok { + // store Provinciecode in map at current index of tracker + Provinciecode[ProvinciecodeTracker] = i.Provinciecode + // store key - idx + ProvinciecodeIdxMap[i.Provinciecode] = ProvinciecodeTracker + // increase tracker + ProvinciecodeTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := ProvincienaamIdxMap[i.Provincienaam]; !ok { + // store Provincienaam in map at current index of tracker + Provincienaam[ProvincienaamTracker] = i.Provincienaam + // store key - idx + ProvincienaamIdxMap[i.Provincienaam] = ProvincienaamTracker + // increase tracker + ProvincienaamTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := PandGasEanAansluitingenIdxMap[i.PandGasEanAansluitingen]; !ok { + // store PandGasEanAansluitingen in map at current index of tracker + PandGasEanAansluitingen[PandGasEanAansluitingenTracker] = i.PandGasEanAansluitingen + // store key - idx + PandGasEanAansluitingenIdxMap[i.PandGasEanAansluitingen] = PandGasEanAansluitingenTracker + // increase tracker + PandGasEanAansluitingenTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := P6GasAansluitingen2020IdxMap[i.P6GasAansluitingen2020]; !ok { + // store P6GasAansluitingen2020 in map at current index of tracker + P6GasAansluitingen2020[P6GasAansluitingen2020Tracker] = i.P6GasAansluitingen2020 + // store key - idx + P6GasAansluitingen2020IdxMap[i.P6GasAansluitingen2020] = P6GasAansluitingen2020Tracker + // increase tracker + P6GasAansluitingen2020Tracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := P6Gasm32020IdxMap[i.P6Gasm32020]; !ok { + // store P6Gasm32020 in map at current index of tracker + P6Gasm32020[P6Gasm32020Tracker] = i.P6Gasm32020 + // store key - idx + P6Gasm32020IdxMap[i.P6Gasm32020] = P6Gasm32020Tracker + // increase tracker + P6Gasm32020Tracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := P6Kwh2020IdxMap[i.P6Kwh2020]; !ok { + // store P6Kwh2020 in map at current index of tracker + P6Kwh2020[P6Kwh2020Tracker] = i.P6Kwh2020 + // store key - idx + P6Kwh2020IdxMap[i.P6Kwh2020] = P6Kwh2020Tracker + // increase tracker + P6Kwh2020Tracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := PandBouwjaarIdxMap[i.PandBouwjaar]; !ok { + // store PandBouwjaar in map at current index of tracker + PandBouwjaar[PandBouwjaarTracker] = i.PandBouwjaar + // store key - idx + PandBouwjaarIdxMap[i.PandBouwjaar] = PandBouwjaarTracker + // increase tracker + PandBouwjaarTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := PandGasAansluitingenIdxMap[i.PandGasAansluitingen]; !ok { + // store PandGasAansluitingen in map at current index of tracker + PandGasAansluitingen[PandGasAansluitingenTracker] = i.PandGasAansluitingen + // store key - idx + PandGasAansluitingenIdxMap[i.PandGasAansluitingen] = PandGasAansluitingenTracker + // increase tracker + PandGasAansluitingenTracker += 1 + } + + //check if column value is already present + //else store new key + if _, ok := GebruiksdoelenIdxMap[i.Gebruiksdoelen]; !ok { + // store Gebruiksdoelen in map at current index of tracker + Gebruiksdoelen[GebruiksdoelenTracker] = i.Gebruiksdoelen + // store key - idx + GebruiksdoelenIdxMap[i.Gebruiksdoelen] = GebruiksdoelenTracker + // increase tracker + GebruiksdoelenTracker += 1 + } + + //check if column value is already present + //else store new key + doelen := make([]uint16, 0) + + // parsing {a, b} array values + // string should be at least 2 example "{}" == size 2 + if len(i.Gebruiksdoelen) > 2 { + + gebruiksdoelen, err := ParsePGArray(i.Gebruiksdoelen) + if err != nil { + log.Fatal(err, "error parsing array ") + } + + for _, gd := range gebruiksdoelen { + if _, ok := GebruiksdoelenIdxMap[gd]; !ok { + // store Gebruiksdoelen in map at current index of tracker + Gebruiksdoelen[GebruiksdoelenTracker] = gd + // store key - idx + GebruiksdoelenIdxMap[gd] = GebruiksdoelenTracker + // increase tracker + GebruiksdoelenTracker += 1 + } + } + + for _, v := range gebruiksdoelen { + doelen = append(doelen, GebruiksdoelenIdxMap[v]) + } + } + + return Item{ + + label, + + i.Pid, + i.Vid, + i.Numid, + i.Postcode, + i.Oppervlakte, + i.Woningequivalent, + i.Adres, + WoningTypeIdxMap[i.WoningType], + LabelscoreVoorlopigIdxMap[i.LabelscoreVoorlopig], + LabelscoreDefinitiefIdxMap[i.LabelscoreDefinitief], + GemeentecodeIdxMap[i.Gemeentecode], + GemeentenaamIdxMap[i.Gemeentenaam], + BuurtcodeIdxMap[i.Buurtcode], + BuurtnaamIdxMap[i.Buurtnaam], + WijkcodeIdxMap[i.Wijkcode], + WijknaamIdxMap[i.Wijknaam], + ProvinciecodeIdxMap[i.Provinciecode], + ProvincienaamIdxMap[i.Provincienaam], + i.Point, + PandGasEanAansluitingenIdxMap[i.PandGasEanAansluitingen], + i.GroupId2020, + P6GasAansluitingen2020IdxMap[i.P6GasAansluitingen2020], + P6Gasm32020IdxMap[i.P6Gasm32020], + P6Kwh2020IdxMap[i.P6Kwh2020], + i.P6TotaalPandoppervlakM2, + PandBouwjaarIdxMap[i.PandBouwjaar], + PandGasAansluitingenIdxMap[i.PandGasAansluitingen], + doelen, + } +} + +// Store selected columns in seperate map[columnvalue]bitarray +// for gast item lookup +func (i Item) StoreBitArrayColumns() { + + balock.Lock() + defer balock.Unlock() + + lock.RLock() + defer lock.RUnlock() + + var ba bitarray.BitArray + var ok bool + + // Column WoningType has byte arrays for + ba, ok = WoningTypeItems[i.WoningType] + if !ok { + ba = bitarray.NewSparseBitArray() + WoningTypeItems[i.WoningType] = ba + } + + ba.SetBit(uint64(i.Label)) + // Column LabelscoreVoorlopig has byte arrays for + ba, ok = LabelscoreVoorlopigItems[i.LabelscoreVoorlopig] + if !ok { + ba = bitarray.NewSparseBitArray() + LabelscoreVoorlopigItems[i.LabelscoreVoorlopig] = ba + } + + ba.SetBit(uint64(i.Label)) + // Column LabelscoreDefinitief has byte arrays for + ba, ok = LabelscoreDefinitiefItems[i.LabelscoreDefinitief] + if !ok { + ba = bitarray.NewSparseBitArray() + LabelscoreDefinitiefItems[i.LabelscoreDefinitief] = ba + } + + ba.SetBit(uint64(i.Label)) + // Column Gemeentecode has byte arrays for + ba, ok = GemeentecodeItems[i.Gemeentecode] + if !ok { + ba = bitarray.NewSparseBitArray() + GemeentecodeItems[i.Gemeentecode] = ba + } + + ba.SetBit(uint64(i.Label)) + // Column Buurtcode has byte arrays for + ba, ok = BuurtcodeItems[i.Buurtcode] + if !ok { + ba = bitarray.NewSparseBitArray() + BuurtcodeItems[i.Buurtcode] = ba + } + + ba.SetBit(uint64(i.Label)) + // Column Wijkcode has byte arrays for + ba, ok = WijkcodeItems[i.Wijkcode] + if !ok { + ba = bitarray.NewSparseBitArray() + WijkcodeItems[i.Wijkcode] = ba + } + + ba.SetBit(uint64(i.Label)) + // Column Provinciecode has byte arrays for + ba, ok = ProvinciecodeItems[i.Provinciecode] + if !ok { + ba = bitarray.NewSparseBitArray() + ProvinciecodeItems[i.Provinciecode] = ba + } + + ba.SetBit(uint64(i.Label)) + + /* + // Column Buurtcode has byte arrays for + ba, ok = BuurtcodeItems[i.Buurtcode] + if !ok { + ba = bitarray.NewSparseBitArray() + BuurtcodeItems[i.Buurtcode] = ba + } + ba.SetBit(uint64(i.Label)) + */ + +} + +func (i Item) Serialize() ItemOut { + + lock.RLock() + defer lock.RUnlock() + + return ItemOut{ + + i.Pid, + i.Vid, + i.Numid, + i.Postcode, + i.Oppervlakte, + i.Woningequivalent, + i.Adres, + WoningType[i.WoningType], + LabelscoreVoorlopig[i.LabelscoreVoorlopig], + LabelscoreDefinitief[i.LabelscoreDefinitief], + Gemeentecode[i.Gemeentecode], + Gemeentenaam[i.Gemeentenaam], + Buurtcode[i.Buurtcode], + Buurtnaam[i.Buurtnaam], + Wijkcode[i.Wijkcode], + Wijknaam[i.Wijknaam], + Provinciecode[i.Provinciecode], + Provincienaam[i.Provincienaam], + i.Point, + PandGasEanAansluitingen[i.PandGasEanAansluitingen], + i.GroupId2020, + P6GasAansluitingen2020[i.P6GasAansluitingen2020], + P6Gasm32020[i.P6Gasm32020], + P6Kwh2020[i.P6Kwh2020], + i.P6TotaalPandoppervlakM2, + PandBouwjaar[i.PandBouwjaar], + PandGasAansluitingen[i.PandGasAansluitingen], + GettersGebruiksdoelen(&i), + } +} + +func (i ItemIn) Columns() []string { + return []string{ + + "pid", + "vid", + "numid", + "postcode", + "oppervlakte", + "woningequivalent", + "adres", + "woning_type", + "labelscore_voorlopig", + "labelscore_definitief", + "gemeentecode", + "gemeentenaam", + "buurtcode", + "buurtnaam", + "wijkcode", + "wijknaam", + "provinciecode", + "provincienaam", + "point", + "pand_gas_ean_aansluitingen", + "group_id_2020", + "p6_gas_aansluitingen_2020", + "p6_gasm3_2020", + "p6_kwh_2020", + "p6_totaal_pandoppervlak_m2", + "pand_bouwjaar", + "pand_gas_aansluitingen", + "gebruiksdoelen", + } +} + +func (i ItemOut) Columns() []string { + return []string{ + + "pid", + "vid", + "numid", + "postcode", + "oppervlakte", + "woningequivalent", + "adres", + "woning_type", + "labelscore_voorlopig", + "labelscore_definitief", + "gemeentecode", + "gemeentenaam", + "buurtcode", + "buurtnaam", + "wijkcode", + "wijknaam", + "provinciecode", + "provincienaam", + "point", + "pand_gas_ean_aansluitingen", + "group_id_2020", + "p6_gas_aansluitingen_2020", + "p6_gasm3_2020", + "p6_kwh_2020", + "p6_totaal_pandoppervlak_m2", + "pand_bouwjaar", + "pand_gas_aansluitingen", + "gebruiksdoelen", + } +} + +func (i Item) Row() []string { + + lock.RLock() + defer lock.RUnlock() + + return []string{ + + i.Pid, + i.Vid, + i.Numid, + i.Postcode, + i.Oppervlakte, + i.Woningequivalent, + i.Adres, + WoningType[i.WoningType], + LabelscoreVoorlopig[i.LabelscoreVoorlopig], + LabelscoreDefinitief[i.LabelscoreDefinitief], + Gemeentecode[i.Gemeentecode], + Gemeentenaam[i.Gemeentenaam], + Buurtcode[i.Buurtcode], + Buurtnaam[i.Buurtnaam], + Wijkcode[i.Wijkcode], + Wijknaam[i.Wijknaam], + Provinciecode[i.Provinciecode], + Provincienaam[i.Provincienaam], + i.Point, + PandGasEanAansluitingen[i.PandGasEanAansluitingen], + i.GroupId2020, + P6GasAansluitingen2020[i.P6GasAansluitingen2020], + P6Gasm32020[i.P6Gasm32020], + P6Kwh2020[i.P6Kwh2020], + i.P6TotaalPandoppervlakM2, + PandBouwjaar[i.PandBouwjaar], + PandGasAansluitingen[i.PandGasAansluitingen], + GettersGebruiksdoelen(&i), + } +} + +func (i Item) GetIndex() string { + return GettersAdres(&i) +} + +func (i Item) GetGeometry() string { + return GettersPoint(&i) +} + +// contain filter Pid +func FilterPidContains(i *Item, s string) bool { + return strings.Contains(i.Pid, s) +} + +// startswith filter Pid +func FilterPidStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Pid, s) +} + +// match filters Pid +func FilterPidMatch(i *Item, s string) bool { + return i.Pid == s +} + +// getter Pid +func GettersPid(i *Item) string { + return i.Pid +} + +// contain filter Vid +func FilterVidContains(i *Item, s string) bool { + return strings.Contains(i.Vid, s) +} + +// startswith filter Vid +func FilterVidStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Vid, s) +} + +// match filters Vid +func FilterVidMatch(i *Item, s string) bool { + return i.Vid == s +} + +// getter Vid +func GettersVid(i *Item) string { + return i.Vid +} + +// contain filter Numid +func FilterNumidContains(i *Item, s string) bool { + return strings.Contains(i.Numid, s) +} + +// startswith filter Numid +func FilterNumidStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Numid, s) +} + +// match filters Numid +func FilterNumidMatch(i *Item, s string) bool { + return i.Numid == s +} + +// getter Numid +func GettersNumid(i *Item) string { + return i.Numid +} + +// contain filter Postcode +func FilterPostcodeContains(i *Item, s string) bool { + return strings.Contains(i.Postcode, s) +} + +// startswith filter Postcode +func FilterPostcodeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Postcode, s) +} + +// match filters Postcode +func FilterPostcodeMatch(i *Item, s string) bool { + return i.Postcode == s +} + +// getter Postcode +func GettersPostcode(i *Item) string { + return i.Postcode +} + +// contain filter Oppervlakte +func FilterOppervlakteContains(i *Item, s string) bool { + return strings.Contains(i.Oppervlakte, s) +} + +// startswith filter Oppervlakte +func FilterOppervlakteStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Oppervlakte, s) +} + +// match filters Oppervlakte +func FilterOppervlakteMatch(i *Item, s string) bool { + return i.Oppervlakte == s +} + +// getter Oppervlakte +func GettersOppervlakte(i *Item) string { + return i.Oppervlakte +} + +// contain filter Woningequivalent +func FilterWoningequivalentContains(i *Item, s string) bool { + return strings.Contains(i.Woningequivalent, s) +} + +// startswith filter Woningequivalent +func FilterWoningequivalentStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Woningequivalent, s) +} + +// match filters Woningequivalent +func FilterWoningequivalentMatch(i *Item, s string) bool { + return i.Woningequivalent == s +} + +// getter Woningequivalent +func GettersWoningequivalent(i *Item) string { + return i.Woningequivalent +} + +// contain filter Adres +func FilterAdresContains(i *Item, s string) bool { + return strings.Contains(i.Adres, s) +} + +// startswith filter Adres +func FilterAdresStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Adres, s) +} + +// match filters Adres +func FilterAdresMatch(i *Item, s string) bool { + return i.Adres == s +} + +// getter Adres +func GettersAdres(i *Item) string { + return i.Adres +} + +// contain filter WoningType +func FilterWoningTypeContains(i *Item, s string) bool { + return strings.Contains(WoningType[i.WoningType], s) +} + +// startswith filter WoningType +func FilterWoningTypeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(WoningType[i.WoningType], s) +} + +// match filters WoningType +func FilterWoningTypeMatch(i *Item, s string) bool { + return WoningType[i.WoningType] == s +} + +// getter WoningType +func GettersWoningType(i *Item) string { + return WoningType[i.WoningType] +} + +// contain filter LabelscoreVoorlopig +func FilterLabelscoreVoorlopigContains(i *Item, s string) bool { + return strings.Contains(LabelscoreVoorlopig[i.LabelscoreVoorlopig], s) +} + +// startswith filter LabelscoreVoorlopig +func FilterLabelscoreVoorlopigStartsWith(i *Item, s string) bool { + return strings.HasPrefix(LabelscoreVoorlopig[i.LabelscoreVoorlopig], s) +} + +// match filters LabelscoreVoorlopig +func FilterLabelscoreVoorlopigMatch(i *Item, s string) bool { + return LabelscoreVoorlopig[i.LabelscoreVoorlopig] == s +} + +// getter LabelscoreVoorlopig +func GettersLabelscoreVoorlopig(i *Item) string { + return LabelscoreVoorlopig[i.LabelscoreVoorlopig] +} + +// contain filter LabelscoreDefinitief +func FilterLabelscoreDefinitiefContains(i *Item, s string) bool { + return strings.Contains(LabelscoreDefinitief[i.LabelscoreDefinitief], s) +} + +// startswith filter LabelscoreDefinitief +func FilterLabelscoreDefinitiefStartsWith(i *Item, s string) bool { + return strings.HasPrefix(LabelscoreDefinitief[i.LabelscoreDefinitief], s) +} + +// match filters LabelscoreDefinitief +func FilterLabelscoreDefinitiefMatch(i *Item, s string) bool { + return LabelscoreDefinitief[i.LabelscoreDefinitief] == s +} + +// getter LabelscoreDefinitief +func GettersLabelscoreDefinitief(i *Item) string { + return LabelscoreDefinitief[i.LabelscoreDefinitief] +} + +// contain filter Gemeentecode +func FilterGemeentecodeContains(i *Item, s string) bool { + return strings.Contains(Gemeentecode[i.Gemeentecode], s) +} + +// startswith filter Gemeentecode +func FilterGemeentecodeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Gemeentecode[i.Gemeentecode], s) +} + +// match filters Gemeentecode +func FilterGemeentecodeMatch(i *Item, s string) bool { + return Gemeentecode[i.Gemeentecode] == s +} + +// getter Gemeentecode +func GettersGemeentecode(i *Item) string { + return Gemeentecode[i.Gemeentecode] +} + +// contain filter Gemeentenaam +func FilterGemeentenaamContains(i *Item, s string) bool { + return strings.Contains(Gemeentenaam[i.Gemeentenaam], s) +} + +// startswith filter Gemeentenaam +func FilterGemeentenaamStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Gemeentenaam[i.Gemeentenaam], s) +} + +// match filters Gemeentenaam +func FilterGemeentenaamMatch(i *Item, s string) bool { + return Gemeentenaam[i.Gemeentenaam] == s +} + +// getter Gemeentenaam +func GettersGemeentenaam(i *Item) string { + return Gemeentenaam[i.Gemeentenaam] +} + +// contain filter Buurtcode +func FilterBuurtcodeContains(i *Item, s string) bool { + return strings.Contains(Buurtcode[i.Buurtcode], s) +} + +// startswith filter Buurtcode +func FilterBuurtcodeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Buurtcode[i.Buurtcode], s) +} + +// match filters Buurtcode +func FilterBuurtcodeMatch(i *Item, s string) bool { + return Buurtcode[i.Buurtcode] == s +} + +// getter Buurtcode +func GettersBuurtcode(i *Item) string { + return Buurtcode[i.Buurtcode] +} + +// contain filter Buurtnaam +func FilterBuurtnaamContains(i *Item, s string) bool { + return strings.Contains(Buurtnaam[i.Buurtnaam], s) +} + +// startswith filter Buurtnaam +func FilterBuurtnaamStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Buurtnaam[i.Buurtnaam], s) +} -type Item struct { - Tconst string `json:"tconst"` - Titletype string `json:"titletype"` - Primarytitle string `json:"primarytitle"` - Originaltitle string `json:"originaltitle"` - Isadult string `json:"isadult"` - Startyear string `json:"startyear"` - Endyear string `json:"endyear"` - Runtimeminutes string `json:"runtimeminutes"` - Genres string `json:"genres"` -} - -func (i Item) Columns() []string { - return []string{ - "tconst", - "titletype", - "primarytitle", - "originaltitle", - "isadult", - "startyear", - "endyear", - "runtimeminutes", - "genres", - } +// match filters Buurtnaam +func FilterBuurtnaamMatch(i *Item, s string) bool { + return Buurtnaam[i.Buurtnaam] == s } -func (i Item) Row() []string { - return []string{ - i.Tconst, - i.Titletype, - i.Primarytitle, - i.Originaltitle, - i.Isadult, - i.Startyear, - i.Endyear, - i.Runtimeminutes, - i.Genres, - } +// getter Buurtnaam +func GettersBuurtnaam(i *Item) string { + return Buurtnaam[i.Buurtnaam] } -func (i Item) GetIndex() string { - return i.Tconst +// contain filter Wijkcode +func FilterWijkcodeContains(i *Item, s string) bool { + return strings.Contains(Wijkcode[i.Wijkcode], s) } -// contain filters -func FilterTconstContains(i *Item, s string) bool { - return strings.Contains(i.Tconst, s) +// startswith filter Wijkcode +func FilterWijkcodeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Wijkcode[i.Wijkcode], s) } -func FilterTitletypeContains(i *Item, s string) bool { - return strings.Contains(i.Titletype, s) + +// match filters Wijkcode +func FilterWijkcodeMatch(i *Item, s string) bool { + return Wijkcode[i.Wijkcode] == s } -func FilterPrimarytitleContains(i *Item, s string) bool { - return strings.Contains(i.Primarytitle, s) + +// getter Wijkcode +func GettersWijkcode(i *Item) string { + return Wijkcode[i.Wijkcode] } -func FilterOriginaltitleContains(i *Item, s string) bool { - return strings.Contains(i.Originaltitle, s) + +// contain filter Wijknaam +func FilterWijknaamContains(i *Item, s string) bool { + return strings.Contains(Wijknaam[i.Wijknaam], s) } -func FilterIsadultContains(i *Item, s string) bool { - return strings.Contains(i.Isadult, s) + +// startswith filter Wijknaam +func FilterWijknaamStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Wijknaam[i.Wijknaam], s) } -func FilterStartyearContains(i *Item, s string) bool { - return strings.Contains(i.Startyear, s) + +// match filters Wijknaam +func FilterWijknaamMatch(i *Item, s string) bool { + return Wijknaam[i.Wijknaam] == s } -func FilterEndyearContains(i *Item, s string) bool { - return strings.Contains(i.Endyear, s) + +// getter Wijknaam +func GettersWijknaam(i *Item) string { + return Wijknaam[i.Wijknaam] } -func FilterRuntimeminutesContains(i *Item, s string) bool { - return strings.Contains(i.Runtimeminutes, s) + +// contain filter Provinciecode +func FilterProvinciecodeContains(i *Item, s string) bool { + return strings.Contains(Provinciecode[i.Provinciecode], s) } -func FilterGenresContains(i *Item, s string) bool { - return strings.Contains(i.Genres, s) + +// startswith filter Provinciecode +func FilterProvinciecodeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Provinciecode[i.Provinciecode], s) } -// startswith filters -func FilterTconstStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Tconst, s) +// match filters Provinciecode +func FilterProvinciecodeMatch(i *Item, s string) bool { + return Provinciecode[i.Provinciecode] == s } -func FilterTitletypeStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Titletype, s) + +// getter Provinciecode +func GettersProvinciecode(i *Item) string { + return Provinciecode[i.Provinciecode] } -func FilterPrimarytitleStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Primarytitle, s) + +// contain filter Provincienaam +func FilterProvincienaamContains(i *Item, s string) bool { + return strings.Contains(Provincienaam[i.Provincienaam], s) } -func FilterOriginaltitleStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Originaltitle, s) + +// startswith filter Provincienaam +func FilterProvincienaamStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Provincienaam[i.Provincienaam], s) } -func FilterIsadultStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Isadult, s) + +// match filters Provincienaam +func FilterProvincienaamMatch(i *Item, s string) bool { + return Provincienaam[i.Provincienaam] == s } -func FilterStartyearStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Startyear, s) + +// getter Provincienaam +func GettersProvincienaam(i *Item) string { + return Provincienaam[i.Provincienaam] } -func FilterEndyearStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Endyear, s) + +// contain filter Point +func FilterPointContains(i *Item, s string) bool { + return strings.Contains(i.Point, s) } -func FilterRuntimeminutesStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Runtimeminutes, s) + +// startswith filter Point +func FilterPointStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Point, s) } -func FilterGenresStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Genres, s) + +// match filters Point +func FilterPointMatch(i *Item, s string) bool { + return i.Point == s } -// match filters -func FilterTconstMatch(i *Item, s string) bool { - return i.Tconst == s +// getter Point +func GettersPoint(i *Item) string { + return i.Point } -func FilterTitletypeMatch(i *Item, s string) bool { - return i.Titletype == s + +// contain filter PandGasEanAansluitingen +func FilterPandGasEanAansluitingenContains(i *Item, s string) bool { + return strings.Contains(PandGasEanAansluitingen[i.PandGasEanAansluitingen], s) } -func FilterPrimarytitleMatch(i *Item, s string) bool { - return i.Primarytitle == s + +// startswith filter PandGasEanAansluitingen +func FilterPandGasEanAansluitingenStartsWith(i *Item, s string) bool { + return strings.HasPrefix(PandGasEanAansluitingen[i.PandGasEanAansluitingen], s) } -func FilterOriginaltitleMatch(i *Item, s string) bool { - return i.Originaltitle == s + +// match filters PandGasEanAansluitingen +func FilterPandGasEanAansluitingenMatch(i *Item, s string) bool { + return PandGasEanAansluitingen[i.PandGasEanAansluitingen] == s } -func FilterIsadultMatch(i *Item, s string) bool { - return i.Isadult == s + +// getter PandGasEanAansluitingen +func GettersPandGasEanAansluitingen(i *Item) string { + return PandGasEanAansluitingen[i.PandGasEanAansluitingen] } -func FilterStartyearMatch(i *Item, s string) bool { - return i.Startyear == s + +// contain filter GroupId2020 +func FilterGroupId2020Contains(i *Item, s string) bool { + return strings.Contains(i.GroupId2020, s) } -func FilterEndyearMatch(i *Item, s string) bool { - return i.Endyear == s + +// startswith filter GroupId2020 +func FilterGroupId2020StartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.GroupId2020, s) } -func FilterRuntimeminutesMatch(i *Item, s string) bool { - return i.Runtimeminutes == s + +// match filters GroupId2020 +func FilterGroupId2020Match(i *Item, s string) bool { + return i.GroupId2020 == s } -func FilterGenresMatch(i *Item, s string) bool { - return i.Genres == s + +// getter GroupId2020 +func GettersGroupId2020(i *Item) string { + return i.GroupId2020 } -// reduce functions +// contain filter P6GasAansluitingen2020 +func FilterP6GasAansluitingen2020Contains(i *Item, s string) bool { + return strings.Contains(P6GasAansluitingen2020[i.P6GasAansluitingen2020], s) +} -func reduceCount(items Items) map[string]string { - result := make(map[string]string) - result["count"] = strconv.Itoa(len(items)) - return result +// startswith filter P6GasAansluitingen2020 +func FilterP6GasAansluitingen2020StartsWith(i *Item, s string) bool { + return strings.HasPrefix(P6GasAansluitingen2020[i.P6GasAansluitingen2020], s) } -// getters -func GettersTconst(i *Item) string { - return i.Tconst +// match filters P6GasAansluitingen2020 +func FilterP6GasAansluitingen2020Match(i *Item, s string) bool { + return P6GasAansluitingen2020[i.P6GasAansluitingen2020] == s +} + +// getter P6GasAansluitingen2020 +func GettersP6GasAansluitingen2020(i *Item) string { + return P6GasAansluitingen2020[i.P6GasAansluitingen2020] +} + +// contain filter P6Gasm32020 +func FilterP6Gasm32020Contains(i *Item, s string) bool { + return strings.Contains(P6Gasm32020[i.P6Gasm32020], s) +} + +// startswith filter P6Gasm32020 +func FilterP6Gasm32020StartsWith(i *Item, s string) bool { + return strings.HasPrefix(P6Gasm32020[i.P6Gasm32020], s) +} + +// match filters P6Gasm32020 +func FilterP6Gasm32020Match(i *Item, s string) bool { + return P6Gasm32020[i.P6Gasm32020] == s +} + +// getter P6Gasm32020 +func GettersP6Gasm32020(i *Item) string { + return P6Gasm32020[i.P6Gasm32020] +} + +// contain filter P6Kwh2020 +func FilterP6Kwh2020Contains(i *Item, s string) bool { + return strings.Contains(P6Kwh2020[i.P6Kwh2020], s) +} + +// startswith filter P6Kwh2020 +func FilterP6Kwh2020StartsWith(i *Item, s string) bool { + return strings.HasPrefix(P6Kwh2020[i.P6Kwh2020], s) +} + +// match filters P6Kwh2020 +func FilterP6Kwh2020Match(i *Item, s string) bool { + return P6Kwh2020[i.P6Kwh2020] == s +} + +// getter P6Kwh2020 +func GettersP6Kwh2020(i *Item) string { + return P6Kwh2020[i.P6Kwh2020] +} + +// contain filter P6TotaalPandoppervlakM2 +func FilterP6TotaalPandoppervlakM2Contains(i *Item, s string) bool { + return strings.Contains(i.P6TotaalPandoppervlakM2, s) +} + +// startswith filter P6TotaalPandoppervlakM2 +func FilterP6TotaalPandoppervlakM2StartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.P6TotaalPandoppervlakM2, s) +} + +// match filters P6TotaalPandoppervlakM2 +func FilterP6TotaalPandoppervlakM2Match(i *Item, s string) bool { + return i.P6TotaalPandoppervlakM2 == s +} + +// getter P6TotaalPandoppervlakM2 +func GettersP6TotaalPandoppervlakM2(i *Item) string { + return i.P6TotaalPandoppervlakM2 +} + +// contain filter PandBouwjaar +func FilterPandBouwjaarContains(i *Item, s string) bool { + return strings.Contains(PandBouwjaar[i.PandBouwjaar], s) +} + +// startswith filter PandBouwjaar +func FilterPandBouwjaarStartsWith(i *Item, s string) bool { + return strings.HasPrefix(PandBouwjaar[i.PandBouwjaar], s) +} + +// match filters PandBouwjaar +func FilterPandBouwjaarMatch(i *Item, s string) bool { + return PandBouwjaar[i.PandBouwjaar] == s +} + +// getter PandBouwjaar +func GettersPandBouwjaar(i *Item) string { + return PandBouwjaar[i.PandBouwjaar] +} + +// contain filter PandGasAansluitingen +func FilterPandGasAansluitingenContains(i *Item, s string) bool { + return strings.Contains(PandGasAansluitingen[i.PandGasAansluitingen], s) +} + +// startswith filter PandGasAansluitingen +func FilterPandGasAansluitingenStartsWith(i *Item, s string) bool { + return strings.HasPrefix(PandGasAansluitingen[i.PandGasAansluitingen], s) +} + +// match filters PandGasAansluitingen +func FilterPandGasAansluitingenMatch(i *Item, s string) bool { + return PandGasAansluitingen[i.PandGasAansluitingen] == s +} + +// getter PandGasAansluitingen +func GettersPandGasAansluitingen(i *Item) string { + return PandGasAansluitingen[i.PandGasAansluitingen] +} + +// contain filter Gebruiksdoelen +func FilterGebruiksdoelenContains(i *Item, s string) bool { + for _, v := range i.Gebruiksdoelen { + vs := Gebruiksdoelen[v] + if strings.Contains(vs, s) { + return true + } + } + return false +} + +// startswith filter Gebruiksdoelen +func FilterGebruiksdoelenStartsWith(i *Item, s string) bool { + for _, v := range i.Gebruiksdoelen { + vs := Gebruiksdoelen[v] + if strings.HasPrefix(vs, s) { + return true + } + } + return false + } -func GettersTitletype(i *Item) string { - return i.Titletype + +// match filters Gebruiksdoelen +func FilterGebruiksdoelenMatch(i *Item, s string) bool { + for _, v := range i.Gebruiksdoelen { + vs := Gebruiksdoelen[v] + if vs == s { + return true + } + } + return false } -func GettersPrimarytitle(i *Item) string { - return i.Primarytitle + +// getter Gebruiksdoelen +func GettersGebruiksdoelen(i *Item) string { + doelen := make([]string, 0) + for _, v := range i.Gebruiksdoelen { + vs := Gebruiksdoelen[v] + doelen = append(doelen, vs) + } + return strings.Join(doelen, ", ") } -func GettersOriginaltitle(i *Item) string { - return i.Originaltitle + +// getter Gebruiksdoelen +func GroupByGettersGebruiksdoelen(item *Item, grouping ItemsGroupedBy) { + + for i := range item.Gebruiksdoelen { + groupkey := Gebruiksdoelen[item.Gebruiksdoelen[i]] + grouping[groupkey] = append(grouping[groupkey], item) + } } -func GettersIsadult(i *Item) string { - return i.Isadult + +/* +// contain filters +func FilterEkeyContains(i *Item, s string) bool { + return strings.Contains(i.Ekey, s) } -func GettersStartyear(i *Item) string { - return i.Startyear + + +// startswith filters +func FilterEkeyStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Ekey, s) } -func GettersEndyear(i *Item) string { - return i.Endyear + + +// match filters +func FilterEkeyMatch(i *Item, s string) bool { + return i.Ekey == s } -func GettersRuntimeminutes(i *Item) string { - return i.Runtimeminutes + +// getters +func GettersEkey(i *Item) string { + return i.Ekey } -func GettersGenres(i *Item) string { - return i.Genres +*/ + +// reduce functions +func reduceCount(items Items) map[string]string { + result := make(map[string]string) + result["count"] = strconv.Itoa(len(items)) + return result } type GroupedOperations struct { - Funcs registerFuncType - GroupBy registerGroupByFunc - Getters registerGettersMap - Reduce registerReduce + Funcs registerFuncType + GroupBy registerGroupByFunc + Getters registerGettersMap + Reduce registerReduce + BitArrays registerBitArray } var Operations GroupedOperations @@ -187,6 +1485,154 @@ var RegisterFuncMap registerFuncType var RegisterGroupBy registerGroupByFunc var RegisterGetters registerGettersMap var RegisterReduce registerReduce +var RegisterBitArray registerBitArray + +// ValidateRegsiters validate exposed columns do match filter names +func validateRegisters() error { + var i = ItemOut{} + var filters = []string{"match", "contains", "startswith"} + for _, c := range i.Columns() { + for _, f := range filters { + if _, ok := RegisterFuncMap[f+"-"+c]; !ok { + return errors.New(c + " is missing in RegisterMap") + } + } + } + return nil +} + +// GetBitArrayWoningType for given v string see if there is +// a bitarray created. +func GetBitArrayWoningType(v string) (bitarray.BitArray, error) { + + bpi, ok := WoningTypeIdxMap[v] + + if !ok { + return nil, errors.New("no bitarray filter found for column value WoningType") + } + + ba, ok := WoningTypeItems[bpi] + + if !ok { + return nil, errors.New("no bitarray filter found for column idx value WoningType") + } + + return ba, nil +} + +// GetBitArrayLabelscoreVoorlopig for given v string see if there is +// a bitarray created. +func GetBitArrayLabelscoreVoorlopig(v string) (bitarray.BitArray, error) { + + bpi, ok := LabelscoreVoorlopigIdxMap[v] + + if !ok { + return nil, errors.New("no bitarray filter found for column value LabelscoreVoorlopig") + } + + ba, ok := LabelscoreVoorlopigItems[bpi] + + if !ok { + return nil, errors.New("no bitarray filter found for column idx value LabelscoreVoorlopig") + } + + return ba, nil +} + +// GetBitArrayLabelscoreDefinitief for given v string see if there is +// a bitarray created. +func GetBitArrayLabelscoreDefinitief(v string) (bitarray.BitArray, error) { + + bpi, ok := LabelscoreDefinitiefIdxMap[v] + + if !ok { + return nil, errors.New("no bitarray filter found for column value LabelscoreDefinitief") + } + + ba, ok := LabelscoreDefinitiefItems[bpi] + + if !ok { + return nil, errors.New("no bitarray filter found for column idx value LabelscoreDefinitief") + } + + return ba, nil +} + +// GetBitArrayGemeentecode for given v string see if there is +// a bitarray created. +func GetBitArrayGemeentecode(v string) (bitarray.BitArray, error) { + + bpi, ok := GemeentecodeIdxMap[v] + + if !ok { + return nil, errors.New("no bitarray filter found for column value Gemeentecode") + } + + ba, ok := GemeentecodeItems[bpi] + + if !ok { + return nil, errors.New("no bitarray filter found for column idx value Gemeentecode") + } + + return ba, nil +} + +// GetBitArrayBuurtcode for given v string see if there is +// a bitarray created. +func GetBitArrayBuurtcode(v string) (bitarray.BitArray, error) { + + bpi, ok := BuurtcodeIdxMap[v] + + if !ok { + return nil, errors.New("no bitarray filter found for column value Buurtcode") + } + + ba, ok := BuurtcodeItems[bpi] + + if !ok { + return nil, errors.New("no bitarray filter found for column idx value Buurtcode") + } + + return ba, nil +} + +// GetBitArrayWijkcode for given v string see if there is +// a bitarray created. +func GetBitArrayWijkcode(v string) (bitarray.BitArray, error) { + + bpi, ok := WijkcodeIdxMap[v] + + if !ok { + return nil, errors.New("no bitarray filter found for column value Wijkcode") + } + + ba, ok := WijkcodeItems[bpi] + + if !ok { + return nil, errors.New("no bitarray filter found for column idx value Wijkcode") + } + + return ba, nil +} + +// GetBitArrayProvinciecode for given v string see if there is +// a bitarray created. +func GetBitArrayProvinciecode(v string) (bitarray.BitArray, error) { + + bpi, ok := ProvinciecodeIdxMap[v] + + if !ok { + return nil, errors.New("no bitarray filter found for column value Provinciecode") + } + + ba, ok := ProvinciecodeItems[bpi] + + if !ok { + return nil, errors.New("no bitarray filter found for column idx value Provinciecode") + } + + return ba, nil +} func init() { @@ -194,98 +1640,387 @@ func init() { RegisterGroupBy = make(registerGroupByFunc) RegisterGetters = make(registerGettersMap) RegisterReduce = make(registerReduce) + RegisterBitArray = make(registerBitArray) + + // register search filter. + //RegisterFuncMap["search"] = 'EDITYOURSELF' + // example RegisterFuncMap["search"] = FilterEkeyStartsWith + + //RegisterFuncMap["value"] = 'EDITYOURSELF' + RegisterGetters["value"] = GettersAdres + + // register filters + + //register filters for Pid + RegisterFuncMap["match-pid"] = FilterPidMatch + RegisterFuncMap["contains-pid"] = FilterPidContains + RegisterFuncMap["startswith-pid"] = FilterPidStartsWith + RegisterGetters["pid"] = GettersPid + RegisterGroupBy["pid"] = GettersPid + + //register filters for Vid + RegisterFuncMap["match-vid"] = FilterVidMatch + RegisterFuncMap["contains-vid"] = FilterVidContains + RegisterFuncMap["startswith-vid"] = FilterVidStartsWith + RegisterGetters["vid"] = GettersVid + RegisterGroupBy["vid"] = GettersVid + + //register filters for Numid + RegisterFuncMap["match-numid"] = FilterNumidMatch + RegisterFuncMap["contains-numid"] = FilterNumidContains + RegisterFuncMap["startswith-numid"] = FilterNumidStartsWith + RegisterGetters["numid"] = GettersNumid + RegisterGroupBy["numid"] = GettersNumid + + //register filters for Postcode + RegisterFuncMap["match-postcode"] = FilterPostcodeMatch + RegisterFuncMap["contains-postcode"] = FilterPostcodeContains + RegisterFuncMap["startswith-postcode"] = FilterPostcodeStartsWith + RegisterGetters["postcode"] = GettersPostcode + RegisterGroupBy["postcode"] = GettersPostcode - // register match filters - - RegisterFuncMap["match-tconst"] = FilterTconstMatch - RegisterFuncMap["match-titletype"] = FilterTitletypeMatch - RegisterFuncMap["match-primarytitle"] = FilterPrimarytitleMatch - RegisterFuncMap["match-originaltitle"] = FilterOriginaltitleMatch - RegisterFuncMap["match-isadult"] = FilterIsadultMatch - RegisterFuncMap["match-startyear"] = FilterStartyearMatch - RegisterFuncMap["match-endyear"] = FilterEndyearMatch - RegisterFuncMap["match-runtimeminutes"] = FilterRuntimeminutesMatch - RegisterFuncMap["match-genres"] = FilterGenresMatch - - // register contains filters - RegisterFuncMap["contains-tconst"] = FilterTconstContains - RegisterFuncMap["contains-titletype"] = FilterTitletypeContains - RegisterFuncMap["contains-primarytitle"] = FilterPrimarytitleContains - RegisterFuncMap["contains-originaltitle"] = FilterOriginaltitleContains - RegisterFuncMap["contains-isadult"] = FilterIsadultContains - RegisterFuncMap["contains-startyear"] = FilterStartyearContains - RegisterFuncMap["contains-endyear"] = FilterEndyearContains - RegisterFuncMap["contains-runtimeminutes"] = FilterRuntimeminutesContains - RegisterFuncMap["contains-genres"] = FilterGenresContains - - // register startswith filters - RegisterFuncMap["startswith-tconst"] = FilterTconstStartsWith - RegisterFuncMap["startswith-titletype"] = FilterTitletypeStartsWith - RegisterFuncMap["startswith-primarytitle"] = FilterPrimarytitleStartsWith - RegisterFuncMap["startswith-originaltitle"] = FilterOriginaltitleStartsWith - RegisterFuncMap["startswith-isadult"] = FilterIsadultStartsWith - RegisterFuncMap["startswith-startyear"] = FilterStartyearStartsWith - RegisterFuncMap["startswith-endyear"] = FilterEndyearStartsWith - RegisterFuncMap["startswith-runtimeminutes"] = FilterRuntimeminutesStartsWith - RegisterFuncMap["startswith-genres"] = FilterGenresStartsWith - - // register getters - RegisterGetters["tconst"] = GettersTconst - RegisterGetters["titletype"] = GettersTitletype - RegisterGetters["primarytitle"] = GettersPrimarytitle - RegisterGetters["originaltitle"] = GettersOriginaltitle - RegisterGetters["isadult"] = GettersIsadult - RegisterGetters["startyear"] = GettersStartyear - RegisterGetters["endyear"] = GettersEndyear - RegisterGetters["runtimeminutes"] = GettersRuntimeminutes - RegisterGetters["genres"] = GettersGenres - - // register groupby - RegisterGroupBy["tconst"] = GettersTconst - RegisterGroupBy["titletype"] = GettersTitletype - RegisterGroupBy["primarytitle"] = GettersPrimarytitle - RegisterGroupBy["originaltitle"] = GettersOriginaltitle - RegisterGroupBy["isadult"] = GettersIsadult - RegisterGroupBy["startyear"] = GettersStartyear - RegisterGroupBy["endyear"] = GettersEndyear - RegisterGroupBy["runtimeminutes"] = GettersRuntimeminutes - RegisterGroupBy["genres"] = GettersGenres + //register filters for Oppervlakte + RegisterFuncMap["match-oppervlakte"] = FilterOppervlakteMatch + RegisterFuncMap["contains-oppervlakte"] = FilterOppervlakteContains + RegisterFuncMap["startswith-oppervlakte"] = FilterOppervlakteStartsWith + RegisterGetters["oppervlakte"] = GettersOppervlakte + RegisterGroupBy["oppervlakte"] = GettersOppervlakte + + //register filters for Woningequivalent + RegisterFuncMap["match-woningequivalent"] = FilterWoningequivalentMatch + RegisterFuncMap["contains-woningequivalent"] = FilterWoningequivalentContains + RegisterFuncMap["startswith-woningequivalent"] = FilterWoningequivalentStartsWith + RegisterGetters["woningequivalent"] = GettersWoningequivalent + RegisterGroupBy["woningequivalent"] = GettersWoningequivalent + + //register filters for Adres + RegisterFuncMap["match-adres"] = FilterAdresMatch + RegisterFuncMap["contains-adres"] = FilterAdresContains + RegisterFuncMap["startswith-adres"] = FilterAdresStartsWith + RegisterGetters["adres"] = GettersAdres + RegisterGroupBy["adres"] = GettersAdres + + //register filters for WoningType + RegisterFuncMap["match-woning_type"] = FilterWoningTypeMatch + RegisterFuncMap["contains-woning_type"] = FilterWoningTypeContains + RegisterFuncMap["startswith-woning_type"] = FilterWoningTypeStartsWith + RegisterGetters["woning_type"] = GettersWoningType + RegisterGroupBy["woning_type"] = GettersWoningType + + RegisterBitArray["woning_type"] = GetBitArrayWoningType + + //register filters for LabelscoreVoorlopig + RegisterFuncMap["match-labelscore_voorlopig"] = FilterLabelscoreVoorlopigMatch + RegisterFuncMap["contains-labelscore_voorlopig"] = FilterLabelscoreVoorlopigContains + RegisterFuncMap["startswith-labelscore_voorlopig"] = FilterLabelscoreVoorlopigStartsWith + RegisterGetters["labelscore_voorlopig"] = GettersLabelscoreVoorlopig + RegisterGroupBy["labelscore_voorlopig"] = GettersLabelscoreVoorlopig + + RegisterBitArray["labelscore_voorlopig"] = GetBitArrayLabelscoreVoorlopig + + //register filters for LabelscoreDefinitief + RegisterFuncMap["match-labelscore_definitief"] = FilterLabelscoreDefinitiefMatch + RegisterFuncMap["contains-labelscore_definitief"] = FilterLabelscoreDefinitiefContains + RegisterFuncMap["startswith-labelscore_definitief"] = FilterLabelscoreDefinitiefStartsWith + RegisterGetters["labelscore_definitief"] = GettersLabelscoreDefinitief + RegisterGroupBy["labelscore_definitief"] = GettersLabelscoreDefinitief + + RegisterBitArray["labelscore_definitief"] = GetBitArrayLabelscoreDefinitief + + //register filters for Gemeentecode + RegisterFuncMap["match-gemeentecode"] = FilterGemeentecodeMatch + RegisterFuncMap["contains-gemeentecode"] = FilterGemeentecodeContains + RegisterFuncMap["startswith-gemeentecode"] = FilterGemeentecodeStartsWith + RegisterGetters["gemeentecode"] = GettersGemeentecode + RegisterGroupBy["gemeentecode"] = GettersGemeentecode + + RegisterBitArray["gemeentecode"] = GetBitArrayGemeentecode + + //register filters for Gemeentenaam + RegisterFuncMap["match-gemeentenaam"] = FilterGemeentenaamMatch + RegisterFuncMap["contains-gemeentenaam"] = FilterGemeentenaamContains + RegisterFuncMap["startswith-gemeentenaam"] = FilterGemeentenaamStartsWith + RegisterGetters["gemeentenaam"] = GettersGemeentenaam + RegisterGroupBy["gemeentenaam"] = GettersGemeentenaam + + //register filters for Buurtcode + RegisterFuncMap["match-buurtcode"] = FilterBuurtcodeMatch + RegisterFuncMap["contains-buurtcode"] = FilterBuurtcodeContains + RegisterFuncMap["startswith-buurtcode"] = FilterBuurtcodeStartsWith + RegisterGetters["buurtcode"] = GettersBuurtcode + RegisterGroupBy["buurtcode"] = GettersBuurtcode + + RegisterBitArray["buurtcode"] = GetBitArrayBuurtcode + + //register filters for Buurtnaam + RegisterFuncMap["match-buurtnaam"] = FilterBuurtnaamMatch + RegisterFuncMap["contains-buurtnaam"] = FilterBuurtnaamContains + RegisterFuncMap["startswith-buurtnaam"] = FilterBuurtnaamStartsWith + RegisterGetters["buurtnaam"] = GettersBuurtnaam + RegisterGroupBy["buurtnaam"] = GettersBuurtnaam + + //register filters for Wijkcode + RegisterFuncMap["match-wijkcode"] = FilterWijkcodeMatch + RegisterFuncMap["contains-wijkcode"] = FilterWijkcodeContains + RegisterFuncMap["startswith-wijkcode"] = FilterWijkcodeStartsWith + RegisterGetters["wijkcode"] = GettersWijkcode + RegisterGroupBy["wijkcode"] = GettersWijkcode + + RegisterBitArray["wijkcode"] = GetBitArrayWijkcode + + //register filters for Wijknaam + RegisterFuncMap["match-wijknaam"] = FilterWijknaamMatch + RegisterFuncMap["contains-wijknaam"] = FilterWijknaamContains + RegisterFuncMap["startswith-wijknaam"] = FilterWijknaamStartsWith + RegisterGetters["wijknaam"] = GettersWijknaam + RegisterGroupBy["wijknaam"] = GettersWijknaam + + //register filters for Provinciecode + RegisterFuncMap["match-provinciecode"] = FilterProvinciecodeMatch + RegisterFuncMap["contains-provinciecode"] = FilterProvinciecodeContains + RegisterFuncMap["startswith-provinciecode"] = FilterProvinciecodeStartsWith + RegisterGetters["provinciecode"] = GettersProvinciecode + RegisterGroupBy["provinciecode"] = GettersProvinciecode + + RegisterBitArray["provinciecode"] = GetBitArrayProvinciecode + + //register filters for Provincienaam + RegisterFuncMap["match-provincienaam"] = FilterProvincienaamMatch + RegisterFuncMap["contains-provincienaam"] = FilterProvincienaamContains + RegisterFuncMap["startswith-provincienaam"] = FilterProvincienaamStartsWith + RegisterGetters["provincienaam"] = GettersProvincienaam + RegisterGroupBy["provincienaam"] = GettersProvincienaam + + //register filters for Point + RegisterFuncMap["match-point"] = FilterPointMatch + RegisterFuncMap["contains-point"] = FilterPointContains + RegisterFuncMap["startswith-point"] = FilterPointStartsWith + RegisterGetters["point"] = GettersPoint + RegisterGroupBy["point"] = GettersPoint + + //register filters for PandGasEanAansluitingen + RegisterFuncMap["match-pand_gas_ean_aansluitingen"] = FilterPandGasEanAansluitingenMatch + RegisterFuncMap["contains-pand_gas_ean_aansluitingen"] = FilterPandGasEanAansluitingenContains + RegisterFuncMap["startswith-pand_gas_ean_aansluitingen"] = FilterPandGasEanAansluitingenStartsWith + RegisterGetters["pand_gas_ean_aansluitingen"] = GettersPandGasEanAansluitingen + RegisterGroupBy["pand_gas_ean_aansluitingen"] = GettersPandGasEanAansluitingen + + //register filters for GroupId2020 + RegisterFuncMap["match-group_id_2020"] = FilterGroupId2020Match + RegisterFuncMap["contains-group_id_2020"] = FilterGroupId2020Contains + RegisterFuncMap["startswith-group_id_2020"] = FilterGroupId2020StartsWith + RegisterGetters["group_id_2020"] = GettersGroupId2020 + RegisterGroupBy["group_id_2020"] = GettersGroupId2020 + + //register filters for P6GasAansluitingen2020 + RegisterFuncMap["match-p6_gas_aansluitingen_2020"] = FilterP6GasAansluitingen2020Match + RegisterFuncMap["contains-p6_gas_aansluitingen_2020"] = FilterP6GasAansluitingen2020Contains + RegisterFuncMap["startswith-p6_gas_aansluitingen_2020"] = FilterP6GasAansluitingen2020StartsWith + RegisterGetters["p6_gas_aansluitingen_2020"] = GettersP6GasAansluitingen2020 + RegisterGroupBy["p6_gas_aansluitingen_2020"] = GettersP6GasAansluitingen2020 + + //register filters for P6Gasm32020 + RegisterFuncMap["match-p6_gasm3_2020"] = FilterP6Gasm32020Match + RegisterFuncMap["contains-p6_gasm3_2020"] = FilterP6Gasm32020Contains + RegisterFuncMap["startswith-p6_gasm3_2020"] = FilterP6Gasm32020StartsWith + RegisterGetters["p6_gasm3_2020"] = GettersP6Gasm32020 + RegisterGroupBy["p6_gasm3_2020"] = GettersP6Gasm32020 + + //register filters for P6Kwh2020 + RegisterFuncMap["match-p6_kwh_2020"] = FilterP6Kwh2020Match + RegisterFuncMap["contains-p6_kwh_2020"] = FilterP6Kwh2020Contains + RegisterFuncMap["startswith-p6_kwh_2020"] = FilterP6Kwh2020StartsWith + RegisterGetters["p6_kwh_2020"] = GettersP6Kwh2020 + RegisterGroupBy["p6_kwh_2020"] = GettersP6Kwh2020 + + //register filters for P6TotaalPandoppervlakM2 + RegisterFuncMap["match-p6_totaal_pandoppervlak_m2"] = FilterP6TotaalPandoppervlakM2Match + RegisterFuncMap["contains-p6_totaal_pandoppervlak_m2"] = FilterP6TotaalPandoppervlakM2Contains + RegisterFuncMap["startswith-p6_totaal_pandoppervlak_m2"] = FilterP6TotaalPandoppervlakM2StartsWith + RegisterGetters["p6_totaal_pandoppervlak_m2"] = GettersP6TotaalPandoppervlakM2 + RegisterGroupBy["p6_totaal_pandoppervlak_m2"] = GettersP6TotaalPandoppervlakM2 + + //register filters for PandBouwjaar + RegisterFuncMap["match-pand_bouwjaar"] = FilterPandBouwjaarMatch + RegisterFuncMap["contains-pand_bouwjaar"] = FilterPandBouwjaarContains + RegisterFuncMap["startswith-pand_bouwjaar"] = FilterPandBouwjaarStartsWith + RegisterGetters["pand_bouwjaar"] = GettersPandBouwjaar + RegisterGroupBy["pand_bouwjaar"] = GettersPandBouwjaar + + //register filters for PandGasAansluitingen + RegisterFuncMap["match-pand_gas_aansluitingen"] = FilterPandGasAansluitingenMatch + RegisterFuncMap["contains-pand_gas_aansluitingen"] = FilterPandGasAansluitingenContains + RegisterFuncMap["startswith-pand_gas_aansluitingen"] = FilterPandGasAansluitingenStartsWith + RegisterGetters["pand_gas_aansluitingen"] = GettersPandGasAansluitingen + RegisterGroupBy["pand_gas_aansluitingen"] = GettersPandGasAansluitingen + + //register filters for Gebruiksdoelen + RegisterFuncMap["match-gebruiksdoelen"] = FilterGebruiksdoelenMatch + RegisterFuncMap["contains-gebruiksdoelen"] = FilterGebruiksdoelenContains + RegisterFuncMap["startswith-gebruiksdoelen"] = FilterGebruiksdoelenStartsWith + RegisterGetters["gebruiksdoelen"] = GettersGebruiksdoelen + RegisterGroupBy["gebruiksdoelen"] = GettersGebruiksdoelen + + validateRegisters() + /* + RegisterFuncMap["match-ekey"] = FilterEkeyMatch + RegisterFuncMap["contains-ekey"] = FilterEkeyContains + // register startswith filters + RegisterFuncMap["startswith-ekey"] = FilterEkeyStartsWith + // register getters + RegisterGetters["ekey"] = GettersEkey + // register groupby + RegisterGroupBy["ekey"] = GettersEkey + + */ // register reduce functions RegisterReduce["count"] = reduceCount + RegisterReduce["woningequivalent"] = reduceWEQ } -func sortBy(items Items, sortingL []string) (Items, []string) { - sortFuncs := map[string]func(int, int) bool{"tconst": func(i, j int) bool { return items[i].Tconst < items[j].Tconst }, - "-tconst": func(i, j int) bool { return items[i].Tconst > items[j].Tconst }, - "titletype": func(i, j int) bool { return items[i].Titletype < items[j].Titletype }, - "-titletype": func(i, j int) bool { return items[i].Titletype > items[j].Titletype }, +type sortLookup map[string]func(int, int) bool + +func createSort(items Items) sortLookup { + + sortFuncs := sortLookup{ + + "pid": func(i, j int) bool { return items[i].Pid < items[j].Pid }, + "-pid": func(i, j int) bool { return items[i].Pid > items[j].Pid }, + + "vid": func(i, j int) bool { return items[i].Vid < items[j].Vid }, + "-vid": func(i, j int) bool { return items[i].Vid > items[j].Vid }, + + "numid": func(i, j int) bool { return items[i].Numid < items[j].Numid }, + "-numid": func(i, j int) bool { return items[i].Numid > items[j].Numid }, + + "postcode": func(i, j int) bool { return items[i].Postcode < items[j].Postcode }, + "-postcode": func(i, j int) bool { return items[i].Postcode > items[j].Postcode }, + + "oppervlakte": func(i, j int) bool { return items[i].Oppervlakte < items[j].Oppervlakte }, + "-oppervlakte": func(i, j int) bool { return items[i].Oppervlakte > items[j].Oppervlakte }, + + "woningequivalent": func(i, j int) bool { return items[i].Woningequivalent < items[j].Woningequivalent }, + "-woningequivalent": func(i, j int) bool { return items[i].Woningequivalent > items[j].Woningequivalent }, + + "adres": func(i, j int) bool { return items[i].Adres < items[j].Adres }, + "-adres": func(i, j int) bool { return items[i].Adres > items[j].Adres }, + + "woning_type": func(i, j int) bool { return WoningType[items[i].WoningType] < WoningType[items[j].WoningType] }, + "-woning_type": func(i, j int) bool { return WoningType[items[i].WoningType] > WoningType[items[j].WoningType] }, + + "labelscore_voorlopig": func(i, j int) bool { + return LabelscoreVoorlopig[items[i].LabelscoreVoorlopig] < LabelscoreVoorlopig[items[j].LabelscoreVoorlopig] + }, + "-labelscore_voorlopig": func(i, j int) bool { + return LabelscoreVoorlopig[items[i].LabelscoreVoorlopig] > LabelscoreVoorlopig[items[j].LabelscoreVoorlopig] + }, + + "labelscore_definitief": func(i, j int) bool { + return LabelscoreDefinitief[items[i].LabelscoreDefinitief] < LabelscoreDefinitief[items[j].LabelscoreDefinitief] + }, + "-labelscore_definitief": func(i, j int) bool { + return LabelscoreDefinitief[items[i].LabelscoreDefinitief] > LabelscoreDefinitief[items[j].LabelscoreDefinitief] + }, + + "gemeentecode": func(i, j int) bool { return Gemeentecode[items[i].Gemeentecode] < Gemeentecode[items[j].Gemeentecode] }, + "-gemeentecode": func(i, j int) bool { return Gemeentecode[items[i].Gemeentecode] > Gemeentecode[items[j].Gemeentecode] }, + + "gemeentenaam": func(i, j int) bool { return Gemeentenaam[items[i].Gemeentenaam] < Gemeentenaam[items[j].Gemeentenaam] }, + "-gemeentenaam": func(i, j int) bool { return Gemeentenaam[items[i].Gemeentenaam] > Gemeentenaam[items[j].Gemeentenaam] }, + + "buurtcode": func(i, j int) bool { return Buurtcode[items[i].Buurtcode] < Buurtcode[items[j].Buurtcode] }, + "-buurtcode": func(i, j int) bool { return Buurtcode[items[i].Buurtcode] > Buurtcode[items[j].Buurtcode] }, - "primarytitle": func(i, j int) bool { return items[i].Primarytitle < items[j].Primarytitle }, - "-primarytitle": func(i, j int) bool { return items[i].Primarytitle > items[j].Primarytitle }, + "buurtnaam": func(i, j int) bool { return Buurtnaam[items[i].Buurtnaam] < Buurtnaam[items[j].Buurtnaam] }, + "-buurtnaam": func(i, j int) bool { return Buurtnaam[items[i].Buurtnaam] > Buurtnaam[items[j].Buurtnaam] }, - "originaltitle": func(i, j int) bool { return items[i].Originaltitle < items[j].Originaltitle }, - "-originaltitle": func(i, j int) bool { return items[i].Originaltitle > items[j].Originaltitle }, + "wijkcode": func(i, j int) bool { return Wijkcode[items[i].Wijkcode] < Wijkcode[items[j].Wijkcode] }, + "-wijkcode": func(i, j int) bool { return Wijkcode[items[i].Wijkcode] > Wijkcode[items[j].Wijkcode] }, - "isadult": func(i, j int) bool { return items[i].Isadult < items[j].Isadult }, - "-isadult": func(i, j int) bool { return items[i].Isadult > items[j].Isadult }, + "wijknaam": func(i, j int) bool { return Wijknaam[items[i].Wijknaam] < Wijknaam[items[j].Wijknaam] }, + "-wijknaam": func(i, j int) bool { return Wijknaam[items[i].Wijknaam] > Wijknaam[items[j].Wijknaam] }, - "startyear": func(i, j int) bool { return items[i].Startyear < items[j].Startyear }, - "-startyear": func(i, j int) bool { return items[i].Startyear > items[j].Startyear }, + "provinciecode": func(i, j int) bool { + return Provinciecode[items[i].Provinciecode] < Provinciecode[items[j].Provinciecode] + }, + "-provinciecode": func(i, j int) bool { + return Provinciecode[items[i].Provinciecode] > Provinciecode[items[j].Provinciecode] + }, - "endyear": func(i, j int) bool { return items[i].Endyear < items[j].Endyear }, - "-endyear": func(i, j int) bool { return items[i].Endyear > items[j].Endyear }, + "provincienaam": func(i, j int) bool { + return Provincienaam[items[i].Provincienaam] < Provincienaam[items[j].Provincienaam] + }, + "-provincienaam": func(i, j int) bool { + return Provincienaam[items[i].Provincienaam] > Provincienaam[items[j].Provincienaam] + }, - "runtimeminutes": func(i, j int) bool { return items[i].Runtimeminutes < items[j].Runtimeminutes }, - "-runtimeminutes": func(i, j int) bool { return items[i].Runtimeminutes > items[j].Runtimeminutes }, + "point": func(i, j int) bool { return items[i].Point < items[j].Point }, + "-point": func(i, j int) bool { return items[i].Point > items[j].Point }, - "genres": func(i, j int) bool { return items[i].Genres < items[j].Genres }, - "-genres": func(i, j int) bool { return items[i].Genres > items[j].Genres }, + "pand_gas_ean_aansluitingen": func(i, j int) bool { + return PandGasEanAansluitingen[items[i].PandGasEanAansluitingen] < PandGasEanAansluitingen[items[j].PandGasEanAansluitingen] + }, + "-pand_gas_ean_aansluitingen": func(i, j int) bool { + return PandGasEanAansluitingen[items[i].PandGasEanAansluitingen] > PandGasEanAansluitingen[items[j].PandGasEanAansluitingen] + }, + + "group_id_2020": func(i, j int) bool { return items[i].GroupId2020 < items[j].GroupId2020 }, + "-group_id_2020": func(i, j int) bool { return items[i].GroupId2020 > items[j].GroupId2020 }, + + "p6_gas_aansluitingen_2020": func(i, j int) bool { + return P6GasAansluitingen2020[items[i].P6GasAansluitingen2020] < P6GasAansluitingen2020[items[j].P6GasAansluitingen2020] + }, + "-p6_gas_aansluitingen_2020": func(i, j int) bool { + return P6GasAansluitingen2020[items[i].P6GasAansluitingen2020] > P6GasAansluitingen2020[items[j].P6GasAansluitingen2020] + }, + + "p6_gasm3_2020": func(i, j int) bool { return P6Gasm32020[items[i].P6Gasm32020] < P6Gasm32020[items[j].P6Gasm32020] }, + "-p6_gasm3_2020": func(i, j int) bool { return P6Gasm32020[items[i].P6Gasm32020] > P6Gasm32020[items[j].P6Gasm32020] }, + + "p6_kwh_2020": func(i, j int) bool { return P6Kwh2020[items[i].P6Kwh2020] < P6Kwh2020[items[j].P6Kwh2020] }, + "-p6_kwh_2020": func(i, j int) bool { return P6Kwh2020[items[i].P6Kwh2020] > P6Kwh2020[items[j].P6Kwh2020] }, + + "p6_totaal_pandoppervlak_m2": func(i, j int) bool { return items[i].P6TotaalPandoppervlakM2 < items[j].P6TotaalPandoppervlakM2 }, + "-p6_totaal_pandoppervlak_m2": func(i, j int) bool { return items[i].P6TotaalPandoppervlakM2 > items[j].P6TotaalPandoppervlakM2 }, + + "pand_bouwjaar": func(i, j int) bool { return PandBouwjaar[items[i].PandBouwjaar] < PandBouwjaar[items[j].PandBouwjaar] }, + "-pand_bouwjaar": func(i, j int) bool { return PandBouwjaar[items[i].PandBouwjaar] > PandBouwjaar[items[j].PandBouwjaar] }, + + "pand_gas_aansluitingen": func(i, j int) bool { + return PandGasAansluitingen[items[i].PandGasAansluitingen] < PandGasAansluitingen[items[j].PandGasAansluitingen] + }, + "-pand_gas_aansluitingen": func(i, j int) bool { + return PandGasAansluitingen[items[i].PandGasAansluitingen] > PandGasAansluitingen[items[j].PandGasAansluitingen] + }, + + "gebruiksdoelen": func(i, j int) bool { + return GettersGebruiksdoelen(items[i]) < GettersGebruiksdoelen(items[j]) + }, + "-gebruiksdoelen": func(i, j int) bool { + return GettersGebruiksdoelen(items[i]) > GettersGebruiksdoelen(items[j]) + }, + + /* + "ekey": func(i, j int) bool { return items[i].Ekey < items[j].Ekey }, + "-ekey": func(i, j int) bool { return items[i].Ekey > items[j].Ekey }, + */ } + return sortFuncs +} + +func sortBy(items Items, sortingL []string) (Items, []string) { + + lock.Lock() + defer lock.Unlock() + + sortFuncs := createSort(items) + for _, sortFuncName := range sortingL { sortFunc := sortFuncs[sortFuncName] sort.Slice(items, sortFunc) } + // TODO must be nicer way keys := []string{} for key := range sortFuncs { diff --git a/operations.go b/operations.go index 423d1cb..0a72a17 100644 --- a/operations.go +++ b/operations.go @@ -4,16 +4,48 @@ import ( "encoding/json" "fmt" "net/http" - "net/url" + + // "reflect" + "errors" + "log" + "sort" "strconv" "strings" "time" + + "github.com/Workiva/go-datastructures/bitarray" + "github.com/go-spatial/geom" + "github.com/go-spatial/geom/encoding/geojson" ) +type filterFuncc func(*Item, string) bool +type registerFuncType map[string]filterFuncc + +type bitsetFuncc func(string) bitarray.BitArray +type registerBitSetType map[string]bitsetFuncc + +type filterType map[string][]string + +func (ft filterType) CacheKey() string { + filterlist := []string{} + for k, v := range ft { + filterlist = append(filterlist, fmt.Sprintf("%s=%s", k, v)) + } + sort.Strings(filterlist) + return strings.Join(filterlist, "-") +} + +type formatRespFunc func(w http.ResponseWriter, r *http.Request, items Items) +type registerFormatMap map[string]formatRespFunc + type Query struct { - Filters filterType - Excludes filterType - Anys filterType + Filters filterType + Excludes filterType + Anys filterType + BitArrays filterType + + GroupBy string + Reduce string Limit int LimitGiven bool @@ -28,6 +60,9 @@ type Query struct { IndexQuery string IndexGiven bool + Geometry geom.Geometry + GeometryGiven bool + ReturnFormat string } @@ -35,65 +70,113 @@ func (q Query) EarlyExit() bool { return q.LimitGiven && !q.PageGiven && !q.SortByGiven } -func decodeUrl(s string) string { - newS, err := url.QueryUnescape(s) - if err != nil { - fmt.Println("oh no error", err) - return s +// return cachable key for query +func (q Query) CacheKey() (string, error) { + + if SETTINGS.Get("groupbycache") != "yes" { + return "", errors.New("cache disabled") } - return newS + + if q.EarlyExit() { + return "", errors.New("not cached") + } + if q.GeometryGiven { + return "", errors.New("geo not cached") + } + + for k := range RegisterBitArray { + _, filterFound := q.Filters[k] + if filterFound { + return "", errors.New("bitarrays filters do not need to be cached") + } + } + + keys := []string{ + q.Filters.CacheKey(), + q.Excludes.CacheKey(), + q.Anys.CacheKey(), + q.GroupBy, + q.Reduce, + q.ReturnFormat, + } + + return strings.Join(keys, "-"), nil } -// util for api -func parseURLParameters(r *http.Request) Query { +// parseURLParameters checks parameters and builds a query to be run. +func parseURLParameters(r *http.Request) (Query, error) { filterMap := make(filterType) excludeMap := make(filterType) anyMap := make(filterType) - //TODO change query to be based on input. + groupBy := "" + reduce := "" - urlItems := r.URL.Query() + // parse params and body posts // (geo)json data + r.ParseForm() + + if SETTINGS.Get("debug") == "yes" { + for key, value := range r.Form { + fmt.Printf("F %s = %s\n", key, value) + } + } for k := range RegisterFuncMap { - parameter, parameterFound := urlItems[k] + parameter, parameterFound := r.Form[k] if parameterFound && parameter[0] != "" { - newSl := make([]string, len(parameter)) - for i, v := range parameter { - newSl[i] = decodeUrl(v) - } - //filterMap[k] = parameter - filterMap[k] = newSl + filterMap[k] = parameter } - parameter, parameterFound = urlItems["!"+k] + parameter, parameterFound = r.Form["!"+k] if parameterFound && parameter[0] != "" { excludeMap[k] = parameter } - parameter, parameterFound = urlItems["any_"+k] + parameter, parameterFound = r.Form["any_"+k] if parameterFound && parameter[0] != "" { anyMap[k] = parameter } } + // Check and validate groupby parameter + parameter, found := r.Form["groupby"] + if found && parameter[0] != "" { + _, funcFound1 := RegisterGroupBy[parameter[0]] + _, funcFound2 := RegisterGroupByCustom[parameter[0]] + if !funcFound1 && !funcFound2 { + return Query{}, errors.New("invalid groupby parameter") + } + groupBy = parameter[0] + } + + // Check and validate reduce parameter + parameter, found = r.Form["reduce"] + if found && parameter[0] != "" { + _, funcFound := RegisterReduce[parameter[0]] + if !funcFound { + return Query{}, errors.New("Invalid reduce parameter") + } + reduce = parameter[0] + } + // TODO there must be better way page := 1 - pageStr, pageGiven := urlItems["page"] + pageStr, pageGiven := r.Form["page"] if pageGiven { page = intMoreDefault(pageStr[0], 1) } pageSize := 10 - pageSizeStr, pageSizeGiven := urlItems["pagesize"] + pageSizeStr, pageSizeGiven := r.Form["pagesize"] if pageSizeGiven { pageSize = intMoreDefault(pageSizeStr[0], 1) } limit := 0 - limitStr, limitGiven := urlItems["limit"] + limitStr, limitGiven := r.Form["limit"] if limitGiven { limit = intMoreDefault(limitStr[0], 1) } format := "json" - formatStr, formatGiven := urlItems["format"] + formatStr, formatGiven := r.Form["format"] if formatGiven { if formatStr[0] == "csv" { @@ -101,20 +184,37 @@ func parseURLParameters(r *http.Request) Query { } } - sortingL, sortingGiven := urlItems["sortby"] + sortingL, sortingGiven := r.Form["sortby"] index := "" - indexL, indexGiven := urlItems["search"] - indexGiven = indexGiven && (SETTINGS.Get("indexed") == "y") - indexUsed := indexGiven && len(indexL[0]) > 2 + indexL, indexGiven := r.Form["search"] + indexUsed := indexGiven && indexL[0] != "" + if indexUsed { - index = strings.ToLower(indexL[0]) + index = indexL[0] + } + + // check for geojson geometry stuff. + geometryS, geometryGiven := r.Form["geojson"] + var geoinput geojson.Geometry + if geometryGiven && geometryS[0] != "" { + err := json.Unmarshal([]byte(geometryS[0]), &geoinput) + if err != nil { + fmt.Println("parsing geojson error") + fmt.Println(err) + geometryGiven = false + return Query{}, errors.New("failed to parse geojson") + } } + return Query{ Filters: filterMap, Excludes: excludeMap, Anys: anyMap, + GroupBy: groupBy, + Reduce: reduce, + Limit: limit, LimitGiven: limitGiven, @@ -129,19 +229,34 @@ func parseURLParameters(r *http.Request) Query { IndexQuery: index, IndexGiven: indexUsed, + Geometry: geoinput.Geometry, + + GeometryGiven: geometryGiven, + ReturnFormat: format, - } + }, nil } -func groupByRunner(items Items, groubByParameter string) ItemsGroupedBy { +func groupByRunner(items Items, groupByParameter string) ItemsGroupedBy { grouping := make(ItemsGroupedBy) - groupingFunc := RegisterGroupBy[groubByParameter] - if groupingFunc == nil { + groupingFunc := RegisterGroupBy[groupByParameter] + + customGrouping := RegisterGroupByCustom[groupByParameter] + + if groupingFunc == nil && customGrouping == nil { return grouping } + + lock.RLock() + defer lock.RUnlock() + for _, item := range items { - GroupingKey := groupingFunc(item) - grouping[GroupingKey] = append(grouping[GroupingKey], item) + if customGrouping == nil { + GroupingKey := groupingFunc(item) + grouping[GroupingKey] = append(grouping[GroupingKey], item) + } else { + customGrouping(item, grouping) + } } return grouping } @@ -211,9 +326,10 @@ func max(a, b int) int { return b } -func filteredEarlyExit(items Items, operations GroupedOperations, query Query) Items { +func filteredEarlyExit(items *labeledItems, operations GroupedOperations, query Query) Items { + registerFuncs := operations.Funcs - filteredItems := make(Items, 0, len(items)/4) + filteredItems := make(Items, 0, len(*items)/4) excludes := query.Excludes filters := query.Filters anys := query.Anys @@ -226,8 +342,11 @@ func filteredEarlyExit(items Items, operations GroupedOperations, query Query) I stop = limit } - //TODO candidate for speedup - for _, item := range items { + lock.RLock() + defer lock.RUnlock() + + for i := range *items { + item := (*items)[i] if !any(item, anys, registerFuncs) { continue } @@ -243,10 +362,11 @@ func filteredEarlyExit(items Items, operations GroupedOperations, query Query) I break } } + return filteredItems } -func filteredEarlyExitSingle(items Items, column string, operations GroupedOperations, query Query) []string { +func filteredEarlyExitSingle(items *labeledItems, column string, operations GroupedOperations, query Query) []string { registerFuncs := operations.Funcs filteredItemsSet := make(map[string]bool) excludes := query.Excludes @@ -261,8 +381,12 @@ func filteredEarlyExitSingle(items Items, column string, operations GroupedOpera stop = limit } - //TODO candidate for speedup - for _, item := range items { + lock.RLock() + defer lock.RUnlock() + + // TODO candidate for speedup + + for _, item := range *items { if !any(item, anys, registerFuncs) { continue } @@ -272,8 +396,15 @@ func filteredEarlyExitSingle(items Items, column string, operations GroupedOpera if !exclude(item, excludes, registerFuncs) { continue } - single := operations.Getters[column](item) - filteredItemsSet[single] = true + + // return single example value for search field + if f, ok := operations.Getters[column]; ok { + single := f(item) + filteredItemsSet[single] = true + } else { + fmt.Println(column) + fmt.Println("missing getter?") + } if len(filteredItemsSet) == stop { break @@ -291,80 +422,126 @@ func filteredEarlyExitSingle(items Items, column string, operations GroupedOpera return results } -func runQuery(items Items, query Query, operations GroupedOperations) (Items, int64) { +// bit Array Filter. +// for columns with not so unique values it makes sense te create bitarrays. +// to do fast bitwise operations. +func bitArrayFilter( + items *labeledItems, + operations GroupedOperations, + query Query) (labeledItems, error) { + + balock.RLock() + defer balock.RUnlock() + + lock.RLock() + defer lock.RUnlock() + + combinedBitArrays := make([]bitarray.BitArray, 0) + + for k := range operations.BitArrays { + parameter, foundkey := query.Filters["match-"+k] + + if len(parameter) == 0 { + continue + } + if !foundkey { + continue + } + ba, err := operations.BitArrays[k](parameter[0]) + if err != nil { + continue + } + combinedBitArrays = append(combinedBitArrays, ba) + + } + + var bitArrayResult bitarray.BitArray + + if len(combinedBitArrays) > 0 { + bitArrayResult = combinedBitArrays[0] + } else { + log.Println("no bitarrays found") + return nil, errors.New("no bitarray found") + } + + // combine AND bitarrays + if len(combinedBitArrays) > 1 { + for i := range combinedBitArrays[1:] { + bitArrayResult = bitArrayResult.And(combinedBitArrays[i]) + } + } + + // TODO OR + // TODO EXCLUDE + + if bitArrayResult == nil { + log.Fatal("something went wrong with bitarray..") + } + + newItems := make(labeledItems, 0) + labels := bitArrayResult.ToNums() + + for _, l := range labels { + newItems = append(newItems, (*items)[int(l)]) + } + + return newItems, nil +} + +func runQuery(items *labeledItems, query Query, operations GroupedOperations) (Items, int64) { start := time.Now() var newItems Items - //TODO this still needs a cleanup, but it's currently the solution to solve column and the indexes + if query.GeometryGiven { + cu := CoverDefault(query.Geometry) + if len(cu) == 0 { + fmt.Println("covering cell union not created") + } else { + geoitems := SearchGeoItems(cu) + items = &geoitems + fmt.Println(len(geoitems)) + } + } - //if query.IndexGiven && len(STR_INDEX) > 0 { - // items = make(Items, 0) - // indices := INDEX.Lookup([]byte(query.IndexQuery), -1) - // seen := make(map[string]bool) - // for _, idx := range indices { - // key := getStringFromIndex(STR_INDEX, idx) - // if !seen[key] { - // seen[key] = true - // for _, item := range LOOKUP[key] { - // items = append(items, item) - // } - // } + var nextItems *labeledItems + filteredItems, err := bitArrayFilter(items, operations, query) - // } - //} - if query.IndexGiven { - items = runIndexQuery(query) + if err != nil { + nextItems = items + } else { + nextItems = &filteredItems } if query.EarlyExit() { - newItems = filteredEarlyExit(items, operations, query) + newItems = filteredEarlyExit(nextItems, operations, query) } else { - newItems = filtered(items, operations, query) + newItems = filtered(nextItems, operations, query) } + diff := time.Since(start) return newItems, int64(diff) / int64(1000000) } -func runTypeAheadQuery(items Items, column string, query Query, operations GroupedOperations) ([]string, int64) { +func runTypeAheadQuery( + items *labeledItems, column string, query Query, + operations GroupedOperations) ([]string, int64) { start := time.Now() - if query.IndexGiven { - items = runIndexQuery(query) - } results := filteredEarlyExitSingle(items, column, operations, query) diff := time.Since(start) return results, int64(diff) / int64(1000000) } -func runIndexQuery(query Query) Items { - items := make(Items, 0) - indices := INDEX.Lookup([]byte(query.IndexQuery), -1) - seen := make(map[string]bool) - added := make(map[int]bool) - for _, idx := range indices { - key := getStringFromIndex(STR_INDEX, idx) - if !seen[key] { - seen[key] = true - for _, index := range LOOKUPINDEX[key] { - if _, ok := added[index]; !ok { - added[index] = true - items = append(items, ITEMS[index]) - } - - } - } - - } - return items -} - -func filtered(items Items, operations GroupedOperations, query Query) Items { +func filtered(items *labeledItems, operations GroupedOperations, query Query) Items { registerFuncs := operations.Funcs + filteredItems := make(Items, 0) excludes := query.Excludes filters := query.Filters anys := query.Anys - filteredItems := make(Items, 0) - for _, item := range items { + lock.RLock() + defer lock.RUnlock() + + for _, item := range *items { if !any(item, anys, registerFuncs) { continue } @@ -402,6 +579,7 @@ func getHeaderData(items Items, query Query, queryDuration int64) HeaderData { headerData["Total-Pages"] = strconv.Itoa((len(items) / query.PageSize) + 1) } + headerData["Cache-Control"] = "public, max-age=300" headerData["Total-Items"] = strconv.Itoa(len(items)) headerData["Query-Duration"] = strconv.FormatInt(queryDuration, 10) + "ms" bytesQuery, _ := json.Marshal(query) @@ -456,7 +634,6 @@ func sortLimit(items Items, query Query) Items { } // Note the slice built on array, slicing a slice larger then the the slice adds array items - // https://play.golang.org/p/GxhbBGNaXwL if len(items) < query.Limit { return items } From 69f6ade99a82561e8642141c13dcdf865b5cb2a9 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Wed, 28 Oct 2020 22:56:30 +0100 Subject: [PATCH 02/54] enable mistaken disabled code --- operations.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/operations.go b/operations.go index 0a72a17..13f0cf9 100644 --- a/operations.go +++ b/operations.go @@ -512,6 +512,20 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I nextItems = &filteredItems } + if query.IndexGiven && len(STR_INDEX) > 0 { + items = make(Items, 0) + indices := INDEX.Lookup([]byte(query.IndexQuery), -1) + seen := make(map[string]bool) + for _, idx := range indices { + key := getStringFromIndex(STR_INDEX, idx) + if !seen[key] { + seen[key] = true + items = append(items, LOOKUP[key]...) + } + + } + } + if query.EarlyExit() { newItems = filteredEarlyExit(nextItems, operations, query) } else { From d7bdcab4b5291d05e0b96da2dab166f02765d54a Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Wed, 28 Oct 2020 23:02:39 +0100 Subject: [PATCH 03/54] update readme --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3b7be3a..575b209 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # LambdaDB In memory database that uses filters to get the data you need. -Can be used for your needs by changing the models.go file to your needs. +Can be used for your needs by changing the `models.go` file to your needs. Creating and registering of the functionality that is needed. @@ -9,9 +9,9 @@ Creating and registering of the functionality that is needed. You can start the database with only a csv. Go over steps below, And see the result in your browser. 1. place csv file, in dir extras. -2. `python3 create_model.py > ../model.go` -3. cd ../ -4. go fmt +2. `python3 create_model_v2.py` answer the questions.. +3. go fmt model.go +4. mv model.go ../ 5. go build 6. ./lambda --help 7. ./lambda --csv assets/items.csv or `python3 ingestion.py -b 1000` From a34870af6e4e9d994c7ef5d5c2a73a186af83af2 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Mon, 2 Nov 2020 14:02:48 +0100 Subject: [PATCH 04/54] add column validation --- extras/templates/model.template.jinja2 | 15 +++++++++++++++ http_handlers.go | 7 ++++--- operations.go | 3 +-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index 8e75ea8..29cc1f3 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -159,6 +159,19 @@ var RegisterGroupBy registerGroupByFunc var RegisterGetters registerGettersMap var RegisterReduce registerReduce +// ValidateRegsiters validate exposed columns do match filter names +func validateRegisters() { + var i = Item{} + var filters = []string{"match", "contains", "startswith"} + for _, c := range i.Columns() { + for _, f := range filters { + if _, ok := RegisterFuncMap[c+"-"+f]; !ok { + log.Fatal(c + " is missing in RegisterMap") + } + } + } +} + func init() { RegisterFuncMap = make(registerFuncType) @@ -173,6 +186,8 @@ func init() { {{registerFilters}} + validateRegisters() + /* RegisterFuncMap["match-ekey"] = FilterEkeyMatch RegisterFuncMap["contains-ekey"] = FilterEkeyContains diff --git a/http_handlers.go b/http_handlers.go index 5ccef53..be83126 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -2,7 +2,6 @@ package main import ( "encoding/csv" - "encoding/json" "fmt" "index/suffixarray" "log" @@ -27,7 +26,6 @@ func setHeader(items Items, w http.ResponseWriter, query Query, queryTime int64) w.Header().Set("Content-Disposition", "attachment; filename=\"items.csv\"") w.Header().Set("Content-Type", "text/csv; charset=utf-8") } else { - w.Header().Set("Content-Type", "application/json") } @@ -247,6 +245,10 @@ func handleInputStorage(r *http.Request) (string, storageFunc, retrieveFunc, str if !found { storagename := SETTINGS.Get("STORAGEMETHOD") storagefunc = STORAGEFUNCS[storagename] + + s, err := ioutil.ReadAll(fz) + if err != nil { + return } retrievefunc, found := RETRIEVEFUNCS[storagename] @@ -431,7 +433,6 @@ func contextTypeAheadRest(JWTConig jwtConfig, itemChan ItemsChannel, operations column = column[:len(column)-1] } if _, ok := operations.Getters[column]; !ok { - w.Write([]byte("500 wrong column name")) w.WriteHeader(404) w.Write([]byte("column is not found")) return diff --git a/operations.go b/operations.go index 13f0cf9..266ba78 100644 --- a/operations.go +++ b/operations.go @@ -345,8 +345,7 @@ func filteredEarlyExit(items *labeledItems, operations GroupedOperations, query lock.RLock() defer lock.RUnlock() - for i := range *items { - item := (*items)[i] + for _, item := range items { if !any(item, anys, registerFuncs) { continue } From 841bf493162d4d734c5c56a54a4ac621bddebb77 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Thu, 12 Nov 2020 15:09:42 +0100 Subject: [PATCH 05/54] reduce count was not registered fixed in template --- extras/templates/model.template.jinja2 | 3 ++- main.go | 5 +++++ model.go | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index 29cc1f3..7916034 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -198,9 +198,10 @@ func init() { // register groupby RegisterGroupBy["ekey"] = GettersEkey + */ + // register reduce functions RegisterReduce["count"] = reduceCount - */ } type sortLookup map[string]func(int, int) bool diff --git a/main.go b/main.go index 0aed0a8..ce29c6f 100644 --- a/main.go +++ b/main.go @@ -79,6 +79,7 @@ func main() { SETTINGS.Set("null-delimiter", "\\N", "null delimiter") SETTINGS.Set("delimiter", ",", "delimiter") +<<<<<<< HEAD SETTINGS.Set("mgmt", "y", "enable the management api's for lambdadb") SETTINGS.Set("debug", "n", "Add memory debug information during run") @@ -88,6 +89,10 @@ func main() { SETTINGS.Set("prometheus-monitoring", "n", "add promethues monitoring endpoint") SETTINGS.Set("STORAGEMETHOD", "bytes", "Storagemethod available options are json, jsonz, bytes, bytesz") SETTINGS.Set("LOADATSTARTUP", "n", "Load data at startup. ('y', 'n')") + + SETTINGS.Set("readonly", "yes", "only allow read only funcions") + SETTINGS.Set("debug", "no", "print memory usage") + SETTINGS.Parse() //Construct yes or no to booleans in SETTINGS diff --git a/model.go b/model.go index e2e53d2..b1713fd 100644 --- a/model.go +++ b/model.go @@ -1642,6 +1642,7 @@ func init() { RegisterReduce = make(registerReduce) RegisterBitArray = make(registerBitArray) + RegisterReduce["count"] = reduceCount // register search filter. //RegisterFuncMap["search"] = 'EDITYOURSELF' // example RegisterFuncMap["search"] = FilterEkeyStartsWith From aaa23b044b5131a81f37bbfb7405c05d35b270d2 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Thu, 19 Nov 2020 11:48:26 +0100 Subject: [PATCH 06/54] fix validation --- main.go | 2 +- model.go | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/main.go b/main.go index ce29c6f..9fd3f29 100644 --- a/main.go +++ b/main.go @@ -63,7 +63,7 @@ func loadcsv(itemChan ItemsChannel) { SETTINGS.Get("delimiter"), SETTINGS.Get("null-delimiter")) if err != nil { - log.Fatal(err) + log.Fatalln(err) } makeIndex() } diff --git a/model.go b/model.go index b1713fd..e2e53d2 100644 --- a/model.go +++ b/model.go @@ -1642,7 +1642,6 @@ func init() { RegisterReduce = make(registerReduce) RegisterBitArray = make(registerBitArray) - RegisterReduce["count"] = reduceCount // register search filter. //RegisterFuncMap["search"] = 'EDITYOURSELF' // example RegisterFuncMap["search"] = FilterEkeyStartsWith From a1bd93344918ff6ee613aae470db0b6d0a8f8701 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Thu, 19 Nov 2020 14:22:40 +0100 Subject: [PATCH 07/54] start work on geo s2 stuff --- geo.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 geo.go diff --git a/geo.go b/geo.go new file mode 100644 index 0000000..3847d76 --- /dev/null +++ b/geo.go @@ -0,0 +1,22 @@ +package main + +import ( + "github.com/go-spatial/geom/encoding/wkt" + "github.com/golang/geo/s2" + "strings" +) + +var sidx = s2.NewShapeIndex() + +func buildGeoIndex() { + + for _, v := range ITEMS { + addItem(v) + } + +} + +func addItem(i *Item) { + sreader := strings.NewReader(i.geom) + geometry, err := wkt.Decode(sreader) +} From 622f46ef7dd0493db9c90f697393fdcabde60b24 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Fri, 27 Nov 2020 14:45:22 +0100 Subject: [PATCH 08/54] wip s2 test --- geo.go | 87 ++++++++++++++++++++++++++++++++++++++++++++---- http_handlers.go | 6 ++++ main.go | 1 - operations.go | 1 - 4 files changed, 87 insertions(+), 8 deletions(-) diff --git a/geo.go b/geo.go index 3847d76..6602e4e 100644 --- a/geo.go +++ b/geo.go @@ -1,22 +1,97 @@ +/* + + Determine S2 cells involved in geometries. + + inspired by + "github.com/akhenakh/oureadb/index/geodata" + "github.com/akhenakh/oureadb/store" + +*/ package main import ( + "fmt" + "github.com/go-spatial/geom" "github.com/go-spatial/geom/encoding/wkt" "github.com/golang/geo/s2" "strings" ) +var minLevel int +var maxLevel int +var maxCells int + +func init() { + minLevel = 19 + maxLevel = 19 + maxCells = 1 +} + var sidx = s2.NewShapeIndex() -func buildGeoIndex() { +func BuildGeoIndex() { - for _, v := range ITEMS { - addItem(v) + for i, v := range ITEMS { + v.GeoIndex(i) } } -func addItem(i *Item) { - sreader := strings.NewReader(i.geom) - geometry, err := wkt.Decode(sreader) +func (i Item) GeoIndex(idx int) error { + sreader := strings.NewReader(i.Point) + g, err := wkt.Decode(sreader) + if err != nil { + fmt.Printf("error encountered with %s", i.Point) + } + p, err := geom.GetCoordinates(g) + + if err != nil { + fmt.Printf("error encountered with %s", i.Point) + } + + fmt.Println(p) + + x := p[0][0] + y := p[0][1] + center := s2.PointFromLatLng(s2.LatLngFromDegrees(x, y)) + cap := s2.CapFromCenterArea(center, s2RadialAreaMeters(2)) + + coverer := &s2.RegionCoverer{MinLevel: minLevel, MaxLevel: maxLevel, MaxCells: maxCells} + cu := coverer.Covering(cap) + + // no cover for this geo object this is probably an error + if len(cu) == 0 { + fmt.Printf("geo object can't be indexed, empty cover") + } + return nil + +} + +//CalculateCover calculate S2 covering from given user polygon. +func CalculateCover(geom string) { + +} + +// GeoIdsAtCells returns all GeoData keys contained in the cells, without duplicates +func (idx *S2FlatIdx) GeoIdsAtCells(cells []s2.CellID) ([]GeoID, error) { + m := make(map[string]struct{}) + + for _, c := range cells { + ids, err := idx.GeoIdsAtCell(c) + if err != nil { + return nil, errors.Wrap(err, "fetching geo ids from cells failed") + } + for _, id := range ids { + m[string(id)] = struct{}{} + } + } + + res := make([]GeoID, len(m)) + var i int + for k := range m { + res[i] = []byte(k) + i++ + } + + return res, nil } diff --git a/http_handlers.go b/http_handlers.go index be83126..cff7df8 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -245,6 +245,7 @@ func handleInputStorage(r *http.Request) (string, storageFunc, retrieveFunc, str if !found { storagename := SETTINGS.Get("STORAGEMETHOD") storagefunc = STORAGEFUNCS[storagename] + } s, err := ioutil.ReadAll(fz) if err != nil { @@ -258,6 +259,11 @@ func handleInputStorage(r *http.Request) (string, storageFunc, retrieveFunc, str } filename := fmt.Sprintf("%s.%s", FILENAME, storagename) + + msg := fmt.Sprint("Loaded new items in memory amount: ", len(ITEMS)) + fmt.Printf(WarningColorN, msg) + //makeIndex() + BuildGeoIndex() return storagename, storagefunc, retrievefunc, filename } diff --git a/main.go b/main.go index 9fd3f29..1d69574 100644 --- a/main.go +++ b/main.go @@ -79,7 +79,6 @@ func main() { SETTINGS.Set("null-delimiter", "\\N", "null delimiter") SETTINGS.Set("delimiter", ",", "delimiter") -<<<<<<< HEAD SETTINGS.Set("mgmt", "y", "enable the management api's for lambdadb") SETTINGS.Set("debug", "n", "Add memory debug information during run") diff --git a/operations.go b/operations.go index 266ba78..0dac2b8 100644 --- a/operations.go +++ b/operations.go @@ -521,7 +521,6 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I seen[key] = true items = append(items, LOOKUP[key]...) } - } } From 9512c743ed1cdef1bad81a0423141c209dc27449 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Thu, 3 Dec 2020 12:47:15 +0100 Subject: [PATCH 09/54] first working geo-selection --- geo.go | 81 ++++++++++++++++++------ geom_s2cover.go | 156 +++++++++++++++++++++++++++++++++++++++++++++++ http_handlers.go | 7 ++- main.go | 6 +- operations.go | 31 +++++++++- 5 files changed, 258 insertions(+), 23 deletions(-) create mode 100644 geom_s2cover.go diff --git a/geo.go b/geo.go index 6602e4e..18bc70c 100644 --- a/geo.go +++ b/geo.go @@ -6,7 +6,11 @@ "github.com/akhenakh/oureadb/index/geodata" "github.com/akhenakh/oureadb/store" + With S2 CillID's we can find which items are contained in given + filter geometry. + */ + package main import ( @@ -21,24 +25,33 @@ var minLevel int var maxLevel int var maxCells int +var geoIndex s2.CellIndex + +type s2Cells []s2.Cell + +var S2CELLS s2Cells + func init() { - minLevel = 19 - maxLevel = 19 - maxCells = 1 -} + minLevel = 7 + maxLevel = 20 + maxCells = 50 -var sidx = s2.NewShapeIndex() + //not used for now. + geoIndex = s2.CellIndex{} +} func BuildGeoIndex() { for i, v := range ITEMS { - v.GeoIndex(i) + v.GeoIndex(int32(i)) } + //geoIndex.Build() } -func (i Item) GeoIndex(idx int) error { - sreader := strings.NewReader(i.Point) +//GeoIndex for each items determine S2Cell and store it. +func (i Item) GeoIndex(idx int32) error { + sreader := strings.NewReader(i.GetGeometry()) g, err := wkt.Decode(sreader) if err != nil { fmt.Printf("error encountered with %s", i.Point) @@ -49,20 +62,13 @@ func (i Item) GeoIndex(idx int) error { fmt.Printf("error encountered with %s", i.Point) } - fmt.Println(p) - - x := p[0][0] - y := p[0][1] + y := p[0][0] + x := p[0][1] center := s2.PointFromLatLng(s2.LatLngFromDegrees(x, y)) - cap := s2.CapFromCenterArea(center, s2RadialAreaMeters(2)) + cell := s2.CellFromPoint(center) - coverer := &s2.RegionCoverer{MinLevel: minLevel, MaxLevel: maxLevel, MaxCells: maxCells} - cu := coverer.Covering(cap) + S2CELLS = append(S2CELLS, cell) - // no cover for this geo object this is probably an error - if len(cu) == 0 { - fmt.Printf("geo object can't be indexed, empty cover") - } return nil } @@ -72,8 +78,42 @@ func CalculateCover(geom string) { } +// Simple search algo +func SearchOverlapItems(items Items, cu s2.CellUnion) Items { + + cellUnion := make([]s2.Cell, 0) + + //Create S2cells from cell id. + for _, c := range cu { + cell := s2.CellFromCellID(c) + cellUnion = append(cellUnion, cell) + } + + newItems := make(Items, 0) + + for idx, i := range items { + if SearchOverlap(idx, cellUnion) { + newItems = append(newItems, i) + } + } + + return newItems +} + +// SearchOverlap check if any cell of celluntion contains item points +func SearchOverlap(i int, cu []s2.Cell) bool { + + for _, c := range cu { + if c.ContainsCell(S2CELLS[i]) { + return true + } + } + return false +} + // GeoIdsAtCells returns all GeoData keys contained in the cells, without duplicates -func (idx *S2FlatIdx) GeoIdsAtCells(cells []s2.CellID) ([]GeoID, error) { +/* +func (idx *s2.S2FlatIdx) GeoIdsAtCells(cells []s2.CellID) ([]GeoID, error) { m := make(map[string]struct{}) for _, c := range cells { @@ -95,3 +135,4 @@ func (idx *S2FlatIdx) GeoIdsAtCells(cells []s2.CellID) ([]GeoID, error) { return res, nil } +*/ diff --git a/geom_s2cover.go b/geom_s2cover.go new file mode 100644 index 0000000..ff27076 --- /dev/null +++ b/geom_s2cover.go @@ -0,0 +1,156 @@ +/* + convert geojson to s2 cover + + derived from code found @ + github.com/akhenakh/oureadb +*/ + +package main + +import ( + "github.com/go-spatial/geom" + //"github.com/go-spatial/geom/encoding/geojson" + "fmt" + "github.com/golang/geo/s2" + "github.com/pkg/errors" +) + +//geoDataCoverCellUnion given geometry create an s2 cover for it +func geoDataCoverCellUnion(g geom.Geometry, coverer *s2.RegionCoverer, interior bool) (s2.CellUnion, error) { + + if geom.IsEmpty(g) { + fmt.Println("empty?") + return nil, errors.New("invalid geometry") + } + var cu s2.CellUnion + + switch gg := g.(type) { + case geom.Point: + points, _ := geom.GetCoordinates(gg) + c := s2.CellIDFromLatLng( + s2.LatLngFromDegrees(points[0][1], points[0][0]), + ) + cu = append(cu, c.Parent(coverer.MinLevel)) + + case geom.Polygon: + points, _ := geom.GetCoordinates(gg) + cup, err := coverPolygon(points, coverer, interior) + if err != nil { + return nil, errors.Wrap(err, "can't cover polygon") + } + cu = append(cu, cup...) + + case geom.MultiPolygon: + for _, p := range gg.Polygons() { + points, _ := geom.GetCoordinates(p) + cup, err := coverPolygon(points, coverer, interior) + if err != nil { + return nil, errors.Wrap(err, "can't cover multipolygon") + } + + cu = append(cu, cup...) + } + + case geom.LineString: + points, _ := geom.GetCoordinates(gg) + if len(points)%2 != 0 { + return nil, errors.New("invalid coordinates count for line") + } + + pl := make(s2.Polyline, len(points)) + for i := 0; i < len(points); i += 1 { + ll := s2.LatLngFromDegrees(points[i][1], points[i][0]) + pl[i] = s2.PointFromLatLng(ll) + } + + var cupl s2.CellUnion + if interior { + cupl = coverer.InteriorCellUnion(&pl) + } else { + cupl = coverer.CellUnion(&pl) + } + cu = append(cu, cupl...) + + default: + fmt.Println(gg) + return nil, errors.New("unsupported geojson data type") + } + + return cu, nil +} + +func CoverDefault(g geom.Geometry) s2.CellUnion { + + coverer := &s2.RegionCoverer{MinLevel: minLevel, MaxLevel: maxLevel, MaxCells: maxCells} + cu, err := Cover(g, coverer) + + // no cover for this geo object this is probably an error + if len(cu) == 0 || err != nil { + fmt.Println("geo object can't be indexed, empty cover") + fmt.Println(err) + } + return cu +} + +// Cover generates an s2 cover for GeoData gd +func Cover(g geom.Geometry, coverer *s2.RegionCoverer) (s2.CellUnion, error) { + return geoDataCoverCellUnion(g, coverer, false) +} + +// returns an s2 cover from a list of lng, lat forming a closed polygon +func coverPolygon(p []geom.Point, coverer *s2.RegionCoverer, interior bool) (s2.CellUnion, error) { + if len(p) < 3 { + return nil, errors.New("invalid polygons not enough coordinates for a closed polygon") + } + if len(p)%2 != 0 { + return nil, errors.New("invalid polygons odd coordinates number") + } + + l := LoopFromCoordinatesAndCCW(p, true) + if l.IsEmpty() || l.IsFull() { + return nil, errors.New("invalid polygons") + } + + // super hacky try reverse if ContainsOrigin + if l.ContainsOrigin() { + // reversing the slice + for i := len(p)/2 - 1; i >= 0; i-- { + opp := len(p) - 1 - i + p[i], p[opp] = p[opp], p[i] + } + } + + if interior { + return coverer.InteriorCovering(l), nil + } + return coverer.Covering(l), nil +} + +// LoopFromCoordinatesAndCCW creates a LoopFence from a list of lng lat +// if checkCCW is true also try to fix CCW +func LoopFromCoordinatesAndCCW(p []geom.Point, checkCCW bool) *s2.Loop { + if len(p)%2 != 0 || len(p) < 3 { + return nil + } + points := make([]s2.Point, len(p)) + + for i := 0; i < len(p); i += 1 { + points[i] = s2.PointFromLatLng(s2.LatLngFromDegrees(p[i][1], p[i][0])) + } + + if checkCCW && s2.RobustSign(points[0], points[1], points[2]) != s2.CounterClockwise { + // reversing the slice + for i := len(points)/2 - 1; i >= 0; i-- { + opp := len(points) - 1 - i + points[i], points[opp] = points[opp], points[i] + } + } + + if points[0] == points[len(points)-1] { + // remove last item if same as 1st + points = append(points[:len(points)-1], points[len(points)-1+1:]...) + } + + loop := s2.LoopFromPoints(points) + return loop +} diff --git a/http_handlers.go b/http_handlers.go index cff7df8..de0efd6 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -95,8 +95,13 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group } func ItemChanWorker(itemChan ItemsChannel) { + idx := 0 for items := range itemChan { - ITEMS = append(ITEMS, items...) + for _, itm := range items { + ITEMS = append(ITEMS, itm) + itm.GeoIndex(int32(idx)) + idx += 1 + } } } diff --git a/main.go b/main.go index 1d69574..639a8ed 100644 --- a/main.go +++ b/main.go @@ -62,10 +62,14 @@ func loadcsv(itemChan ItemsChannel) { false, true, SETTINGS.Get("delimiter"), SETTINGS.Get("null-delimiter")) + if err != nil { log.Fatalln(err) } - makeIndex() + + // add timeout there is no garantee ItemsChannel + // is empty and you miss a few records + // makeIndex() } func main() { diff --git a/operations.go b/operations.go index 0dac2b8..19329d7 100644 --- a/operations.go +++ b/operations.go @@ -3,12 +3,16 @@ package main import ( "encoding/json" "fmt" + "github.com/go-spatial/geom" + "github.com/go-spatial/geom/encoding/geojson" "net/http" // "reflect" "errors" "log" + "net/url" "sort" + //"reflect" "strconv" "strings" "time" @@ -120,6 +124,20 @@ func parseURLParameters(r *http.Request) (Query, error) { } } + // we can post gejson data + r.ParseForm() + + if SETTINGS.Get("debug") == "yes" { + + for key, value := range r.Form { + fmt.Printf("%s = %s\n", key, value) + } + for key, value := range urlItems { + + fmt.Printf("%s = %s\n", key, value) + } + } + for k := range RegisterFuncMap { parameter, parameterFound := r.Form[k] if parameterFound && parameter[0] != "" { @@ -530,6 +548,17 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I newItems = filtered(nextItems, operations, query) } + if query.GeometryGiven { + fmt.Println("woowoowoo") + cu := CoverDefault(query.Geometry) + fmt.Println(cu) + if len(cu) == 0 { + fmt.Println("covering cell union not created") + } else { + newItems = SearchOverlapItems(newItems, cu) + } + } + diff := time.Since(start) return newItems, int64(diff) / int64(1000000) } @@ -553,7 +582,7 @@ func filtered(items *labeledItems, operations GroupedOperations, query Query) It lock.RLock() defer lock.RUnlock() - for _, item := range *items { + for _, item := range items { if !any(item, anys, registerFuncs) { continue } From 613bdff6cad56a5d5c720ca34138b48a2ec40bc9 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Thu, 3 Dec 2020 14:17:39 +0100 Subject: [PATCH 10/54] working geo-selection --- geo.go | 31 ++++++++++++++++++++++--------- http_handlers.go | 3 +-- operations.go | 5 +++-- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/geo.go b/geo.go index 18bc70c..70cc195 100644 --- a/geo.go +++ b/geo.go @@ -19,15 +19,17 @@ import ( "github.com/go-spatial/geom/encoding/wkt" "github.com/golang/geo/s2" "strings" + "sync" ) var minLevel int var maxLevel int var maxCells int +var s2Lock = sync.RWMutex{} var geoIndex s2.CellIndex -type s2Cells []s2.Cell +type s2Cells map[int]s2.Cell var S2CELLS s2Cells @@ -38,36 +40,44 @@ func init() { //not used for now. geoIndex = s2.CellIndex{} + S2CELLS = make(s2Cells) } func BuildGeoIndex() { - for i, v := range ITEMS { - v.GeoIndex(int32(i)) + v.GeoIndex(i) } - //geoIndex.Build() } //GeoIndex for each items determine S2Cell and store it. -func (i Item) GeoIndex(idx int32) error { +func (i Item) GeoIndex(idx int) error { sreader := strings.NewReader(i.GetGeometry()) g, err := wkt.Decode(sreader) if err != nil { - fmt.Printf("error encountered with %s", i.Point) + fmt.Println(err.Error()) + fmt.Println(i.GetGeometry()) + fmt.Println(i.Ekey) + return fmt.Errorf("wkt error encountered with %s", i.Point) } - p, err := geom.GetCoordinates(g) + p, err := geom.GetCoordinates(g) if err != nil { - fmt.Printf("error encountered with %s", i.Point) + fmt.Println(err.Error()) + fmt.Println(i.Ekey) + fmt.Println(i.GetGeometry()) + fmt.Printf("geom error encountered with %s", i.Point) + return fmt.Errorf("geom error") } + s2Lock.Lock() + defer s2Lock.Unlock() y := p[0][0] x := p[0][1] center := s2.PointFromLatLng(s2.LatLngFromDegrees(x, y)) cell := s2.CellFromPoint(center) - S2CELLS = append(S2CELLS, cell) + S2CELLS[idx] = cell return nil @@ -103,6 +113,9 @@ func SearchOverlapItems(items Items, cu s2.CellUnion) Items { // SearchOverlap check if any cell of celluntion contains item points func SearchOverlap(i int, cu []s2.Cell) bool { + s2Lock.RLock() + defer s2Lock.RUnlock() + for _, c := range cu { if c.ContainsCell(S2CELLS[i]) { return true diff --git a/http_handlers.go b/http_handlers.go index de0efd6..4ae9467 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -82,7 +82,6 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group groupByItems = nil if len(result) == 0 { - w.WriteHeader(404) return } @@ -99,7 +98,7 @@ func ItemChanWorker(itemChan ItemsChannel) { for items := range itemChan { for _, itm := range items { ITEMS = append(ITEMS, itm) - itm.GeoIndex(int32(idx)) + itm.GeoIndex(idx) idx += 1 } } diff --git a/operations.go b/operations.go index 19329d7..6560cdd 100644 --- a/operations.go +++ b/operations.go @@ -112,9 +112,12 @@ func parseURLParameters(r *http.Request) (Query, error) { filterMap := make(filterType) excludeMap := make(filterType) anyMap := make(filterType) + groupBy := "" reduce := "" + //TODO change query to be based on input. + // parse params and body posts // (geo)json data r.ParseForm() @@ -549,9 +552,7 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I } if query.GeometryGiven { - fmt.Println("woowoowoo") cu := CoverDefault(query.Geometry) - fmt.Println(cu) if len(cu) == 0 { fmt.Println("covering cell union not created") } else { From 8ab6bb8ace40adcdea3c689089baf6de22a2308b Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Thu, 3 Dec 2020 14:22:07 +0100 Subject: [PATCH 11/54] add curl test case for geo query --- curltest.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100755 curltest.sh diff --git a/curltest.sh b/curltest.sh new file mode 100755 index 0000000..584d0b7 --- /dev/null +++ b/curltest.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +set -x +set -e +set -u + + +curl \ + --data-urlencode 'geojson={ + "type": "Polygon", + "coordinates": [ + [ + [4.902321, 52.428306], + [4.90127, 52.427024], + [4.905281, 52.426069], + [4.906782, 52.426226], + [4.906418, 52.427469], + [4.902321, 52.428306] + ] + ] + }' \ + 'http://127.0.0.1:8000/list/?groupby=postcode&reduce=count' From 93f801d3039004c8c7929141abd06df550b6da9b Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Mon, 7 Dec 2020 14:29:30 +0100 Subject: [PATCH 12/54] wip test with labels --- geo.go | 85 +++++++++++++++++++++++++++++++++++++++--------- http_handlers.go | 27 ++++++++++----- main.go | 7 ++-- operations.go | 17 +++++++--- 4 files changed, 104 insertions(+), 32 deletions(-) diff --git a/geo.go b/geo.go index 70cc195..d03af1e 100644 --- a/geo.go +++ b/geo.go @@ -1,13 +1,16 @@ /* - Determine S2 cells involved in geometries. + Determine S2 cells involved in geometries. Provide a fast way to lookup + data from based on a geojson query. inspired by "github.com/akhenakh/oureadb/index/geodata" "github.com/akhenakh/oureadb/store" - With S2 CillID's we can find which items are contained in given - filter geometry. + s2 cell index code. + + With S2 CillIDs we can find which items are contained in given + filter geometry (S2 cell union). */ @@ -18,6 +21,7 @@ import ( "github.com/go-spatial/geom" "github.com/go-spatial/geom/encoding/wkt" "github.com/golang/geo/s2" + "sort" "strings" "sync" ) @@ -27,31 +31,38 @@ var maxLevel int var maxCells int var s2Lock = sync.RWMutex{} -var geoIndex s2.CellIndex -type s2Cells map[int]s2.Cell +type cellIndexNode struct { + Cell s2.Cell + Label int +} + +type s2CellIndex []cellIndexNode +type s2CellMap map[int]s2.CellID -var S2CELLS s2Cells +var S2CELLS s2CellIndex +var S2CELLMAP s2CellMap func init() { minLevel = 7 maxLevel = 20 maxCells = 50 - //not used for now. - geoIndex = s2.CellIndex{} - S2CELLS = make(s2Cells) + S2CELLS = make(s2CellIndex, 100000) + S2CELLMAP = s2CellMap{} } func BuildGeoIndex() { for i, v := range ITEMS { v.GeoIndex(i) } - //geoIndex.Build() } //GeoIndex for each items determine S2Cell and store it. -func (i Item) GeoIndex(idx int) error { +func (i Item) GeoIndex(label int) error { + if i.GetGeometry() == "" { + return fmt.Errorf("missing wkt geometry") + } sreader := strings.NewReader(i.GetGeometry()) g, err := wkt.Decode(sreader) if err != nil { @@ -77,7 +88,9 @@ func (i Item) GeoIndex(idx int) error { center := s2.PointFromLatLng(s2.LatLngFromDegrees(x, y)) cell := s2.CellFromPoint(center) - S2CELLS[idx] = cell + cnode := cellIndexNode{Cell: cell, Label: label} + S2CELLS = append(S2CELLS, cnode) + S2CELLMAP[label] = cell.ID() return nil @@ -89,7 +102,30 @@ func CalculateCover(geom string) { } // Simple search algo -func SearchOverlapItems(items Items, cu s2.CellUnion) Items { +func SearchOverlapItems(items *labeledItems, cu s2.CellUnion) labeledItems { + + cellUnion := make([]s2.Cell, 0) + + // Create S2cells from cell id. + for _, c := range cu { + cell := s2.CellFromCellID(c) + cellUnion = append(cellUnion, cell) + } + + newItems := labeledItems{} + + for k, i := range *items { + if cu.ContainsCellID(S2CELLMAP[k]) { + newItems[k] = i + } + } + + return newItems +} + +// Given only a cell Union return items +/* +func SearchRelevantItems(cu s2.CellUnion) Items { cellUnion := make([]s2.Cell, 0) @@ -101,14 +137,18 @@ func SearchOverlapItems(items Items, cu s2.CellUnion) Items { newItems := make(Items, 0) - for idx, i := range items { + min = S2CellS.Seek(cu.RectBound(). + + for idx, i := range S2CellS { if SearchOverlap(idx, cellUnion) { newItems = append(newItems, i) } } return newItems + } +*/ // SearchOverlap check if any cell of celluntion contains item points func SearchOverlap(i int, cu []s2.Cell) bool { @@ -117,7 +157,7 @@ func SearchOverlap(i int, cu []s2.Cell) bool { defer s2Lock.RUnlock() for _, c := range cu { - if c.ContainsCell(S2CELLS[i]) { + if c.ContainsCell(S2CELLS[i].Cell) { return true } } @@ -149,3 +189,18 @@ func (idx *s2.S2FlatIdx) GeoIdsAtCells(cells []s2.CellID) ([]GeoID, error) { return res, nil } */ + +//func (ca *s2Cell) + +// Seek position in index which is close to target +func (ci s2CellIndex) Seek(target s2.CellID) int { + pos := sort.Search(len(ci), func(i int) bool { + return ci[i].Cell.ID() > target + }) - 1 + + // Ensure we don't go beyond the beginning. + if pos < 0 { + pos = 0 + } + return pos +} diff --git a/http_handlers.go b/http_handlers.go index 4ae9467..df16ad7 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -40,7 +40,7 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group return func(w http.ResponseWriter, r *http.Request) { query := parseURLParameters(r) - items, queryTime := runQuery(ITEMS, query, operations) + items, queryTime := runQuery(&ITEMS, query, operations) msg := fmt.Sprint("total: ", len(ITEMS), " hits: ", len(items), " time: ", queryTime, "ms ", "url: ", r.URL) fmt.Printf(NoticeColorN, msg) @@ -94,12 +94,12 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group } func ItemChanWorker(itemChan ItemsChannel) { - idx := 0 + label := 0 for items := range itemChan { for _, itm := range items { - ITEMS = append(ITEMS, itm) - itm.GeoIndex(idx) - idx += 1 + ITEMS[label] = itm + itm.GeoIndex(label) + label += 1 } } } @@ -138,6 +138,8 @@ func rmRest(w http.ResponseWriter, r *http.Request) { ITEMS = make(Items, 0, 100*1000) msg := fmt.Sprint("removed items from database") fmt.Printf(WarningColorN, msg) + ITEMS = labeledItems{} + go func() { time.Sleep(1 * time.Second) runtime.GC() @@ -262,6 +264,10 @@ func handleInputStorage(r *http.Request) (string, storageFunc, retrieveFunc, str retrievefunc = RETRIEVEFUNCS[storagename] } + // empty exising ITEMS + ITEMS = labeledItems{} + json.Unmarshal(s, &ITEMS) + filename := fmt.Sprintf("%s.%s", FILENAME, storagename) msg := fmt.Sprint("Loaded new items in memory amount: ", len(ITEMS)) @@ -409,7 +415,7 @@ func contextSearchRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Gro return func(w http.ResponseWriter, r *http.Request) { query := parseURLParameters(r) - items, queryTime := runQuery(ITEMS, query, operations) + items, queryTime := runQuery(&ITEMS, query, operations) if len(items) == 0 { w.WriteHeader(404) return @@ -448,7 +454,7 @@ func contextTypeAheadRest(JWTConig jwtConfig, itemChan ItemsChannel, operations return } - results, queryTime := runTypeAheadQuery(ITEMS, column, query, operations) + results, queryTime := runTypeAheadQuery(&ITEMS, column, query, operations) if len(results) == 0 { w.WriteHeader(404) return @@ -498,7 +504,12 @@ func helpRest(w http.ResponseWriter, r *http.Request) { registerReduces = append(registerReduces, k) } - _, registeredSortings := sortBy(ITEMS, []string{}) + newItems := make(Items, 10) + for i := 0; i < 10; i++ { + newItems = append(newItems, ITEMS[i]) + } + + _, registeredSortings := sortBy(newItems, []string{}) sort.Strings(registeredFilters) sort.Strings(registeredExcludes) diff --git a/main.go b/main.go index 639a8ed..314cf20 100644 --- a/main.go +++ b/main.go @@ -19,13 +19,14 @@ type formatRespFunc func(w http.ResponseWriter, r *http.Request, items Items) type registerFormatMap map[string]formatRespFunc //Items as Example +type labeledItems map[int]*Item type Items []*Item type ItemsFull []*ItemFull type ItemsGroupedBy map[string]Items type ItemsChannel chan Items -var ITEMS Items +var ITEMS labeledItems type jwtConfig struct { Enabled bool @@ -98,9 +99,7 @@ func main() { SETTINGS.Parse() - //Construct yes or no to booleans in SETTINGS - - ITEMS = make(Items, 0, 100*1000) + ITEMS = labeledItems{} Operations = GroupedOperations{Funcs: RegisterFuncMap, GroupBy: RegisterGroupBy, Getters: RegisterGetters, Reduce: RegisterReduce} itemChan := make(ItemsChannel, 1000) diff --git a/operations.go b/operations.go index 6560cdd..e166670 100644 --- a/operations.go +++ b/operations.go @@ -404,9 +404,7 @@ func filteredEarlyExitSingle(items *labeledItems, column string, operations Grou lock.RLock() defer lock.RUnlock() - // TODO candidate for speedup - - for _, item := range *items { + for _, item := range items { if !any(item, anys, registerFuncs) { continue } @@ -442,6 +440,7 @@ func filteredEarlyExitSingle(items *labeledItems, column string, operations Grou return results } +<<<<<<< HEAD // bit Array Filter. // for columns with not so unique values it makes sense te create bitarrays. // to do fast bitwise operations. @@ -556,10 +555,18 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I if len(cu) == 0 { fmt.Println("covering cell union not created") } else { - newItems = SearchOverlapItems(newItems, cu) + geoitems := SearchOverlapItems(items, cu) + items = &geoitems + fmt.Println(len(geoitems)) } } + if query.EarlyExit() { + newItems = filteredEarlyExit(items, operations, query) + } else { + newItems = filtered(items, operations, query) + } + diff := time.Since(start) return newItems, int64(diff) / int64(1000000) } @@ -583,7 +590,7 @@ func filtered(items *labeledItems, operations GroupedOperations, query Query) It lock.RLock() defer lock.RUnlock() - for _, item := range items { + for _, item := range *items { if !any(item, anys, registerFuncs) { continue } From 63d3429a2012e0fbc16e9a951790d20c134664ca Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 8 Dec 2020 02:47:42 +0100 Subject: [PATCH 13/54] geosearch works --- curltest.sh | 2 +- geo.go | 123 ++++++++++++++++++----------------------------- http_handlers.go | 8 +-- main.go | 5 ++ operations.go | 1 + 5 files changed, 60 insertions(+), 79 deletions(-) diff --git a/curltest.sh b/curltest.sh index 584d0b7..fb0d54f 100755 --- a/curltest.sh +++ b/curltest.sh @@ -5,7 +5,7 @@ set -e set -u -curl \ +curl -vvv \ --data-urlencode 'geojson={ "type": "Polygon", "coordinates": [ diff --git a/geo.go b/geo.go index d03af1e..62528ce 100644 --- a/geo.go +++ b/geo.go @@ -33,13 +33,18 @@ var maxCells int var s2Lock = sync.RWMutex{} type cellIndexNode struct { - Cell s2.Cell + ID s2.CellID Label int } type s2CellIndex []cellIndexNode type s2CellMap map[int]s2.CellID +//implement Sort interface for s2CellIndex +func (c s2CellIndex) Len() int { return len(c) } +func (c s2CellIndex) Swap(i, j int) { c[i], c[j] = c[j], c[i] } +func (c s2CellIndex) Less(i, j int) bool { return c[i].ID < c[j].ID } + var S2CELLS s2CellIndex var S2CELLMAP s2CellMap @@ -48,7 +53,7 @@ func init() { maxLevel = 20 maxCells = 50 - S2CELLS = make(s2CellIndex, 100000) + S2CELLS = make(s2CellIndex, 0) S2CELLMAP = s2CellMap{} } @@ -58,6 +63,10 @@ func BuildGeoIndex() { } } +func (c cellIndexNode) IsEmpty() bool { + return c.ID == 0 +} + //GeoIndex for each items determine S2Cell and store it. func (i Item) GeoIndex(label int) error { if i.GetGeometry() == "" { @@ -80,37 +89,41 @@ func (i Item) GeoIndex(label int) error { fmt.Printf("geom error encountered with %s", i.Point) return fmt.Errorf("geom error") } + s2Lock.Lock() defer s2Lock.Unlock() y := p[0][0] x := p[0][1] - center := s2.PointFromLatLng(s2.LatLngFromDegrees(x, y)) + ll := s2.LatLngFromDegrees(x, y) + + if !ll.IsValid() { + fmt.Println(i.Ekey) + fmt.Println(i.GetGeometry()) + fmt.Printf("ll geom error encountered with %f %f", x, y) + return fmt.Errorf("geom error") + } + + center := s2.PointFromLatLng(ll) cell := s2.CellFromPoint(center) - cnode := cellIndexNode{Cell: cell, Label: label} + cnode := cellIndexNode{ID: cell.ID(), Label: label} S2CELLS = append(S2CELLS, cnode) S2CELLMAP[label] = cell.ID() - return nil - -} + if label%100000 == 0 { + S2CELLS.Sort() + } -//CalculateCover calculate S2 covering from given user polygon. -func CalculateCover(geom string) { + return nil } // Simple search algo func SearchOverlapItems(items *labeledItems, cu s2.CellUnion) labeledItems { - cellUnion := make([]s2.Cell, 0) - - // Create S2cells from cell id. - for _, c := range cu { - cell := s2.CellFromCellID(c) - cellUnion = append(cellUnion, cell) - } + s2Lock.RLock() + defer s2Lock.RUnlock() newItems := labeledItems{} @@ -119,83 +132,38 @@ func SearchOverlapItems(items *labeledItems, cu s2.CellUnion) labeledItems { newItems[k] = i } } - return newItems } -// Given only a cell Union return items -/* -func SearchRelevantItems(cu s2.CellUnion) Items { - - cellUnion := make([]s2.Cell, 0) - - //Create S2cells from cell id. - for _, c := range cu { - cell := s2.CellFromCellID(c) - cellUnion = append(cellUnion, cell) - } - - newItems := make(Items, 0) - - min = S2CellS.Seek(cu.RectBound(). - - for idx, i := range S2CellS { - if SearchOverlap(idx, cellUnion) { - newItems = append(newItems, i) - } - } - - return newItems - -} -*/ +// Given only a cell Union return labeldItems +func SearchGeoItems(cu s2.CellUnion) labeledItems { -// SearchOverlap check if any cell of celluntion contains item points -func SearchOverlap(i int, cu []s2.Cell) bool { + newItems := labeledItems{} - s2Lock.RLock() - defer s2Lock.RUnlock() + cu.Normalize() - for _, c := range cu { - if c.ContainsCell(S2CELLS[i].Cell) { - return true - } + for i, c := range cu { + fmt.Printf("%d %s \n", i, c) } - return false -} -// GeoIdsAtCells returns all GeoData keys contained in the cells, without duplicates -/* -func (idx *s2.S2FlatIdx) GeoIdsAtCells(cells []s2.CellID) ([]GeoID, error) { - m := make(map[string]struct{}) + min := S2CELLS.Seek(cu[0].ChildBegin()) + max := S2CELLS.Seek(cu[len(cu)-1].ChildEnd()) - for _, c := range cells { - ids, err := idx.GeoIdsAtCell(c) - if err != nil { - return nil, errors.Wrap(err, "fetching geo ids from cells failed") - } - for _, id := range ids { - m[string(id)] = struct{}{} + for _, i := range S2CELLS[min : max+1] { + if cu.ContainsCellID(i.ID) { + newItems[i.Label] = ITEMS[i.Label] } } - res := make([]GeoID, len(m)) - var i int - for k := range m { - res[i] = []byte(k) - i++ - } + return newItems - return res, nil } -*/ - -//func (ca *s2Cell) // Seek position in index which is close to target func (ci s2CellIndex) Seek(target s2.CellID) int { + pos := sort.Search(len(ci), func(i int) bool { - return ci[i].Cell.ID() > target + return ci[i].ID > target }) - 1 // Ensure we don't go beyond the beginning. @@ -204,3 +172,8 @@ func (ci s2CellIndex) Seek(target s2.CellID) int { } return pos } + +// Sort CellIndex so Binary search can work. +func (ci s2CellIndex) Sort() { + sort.Sort(ci) +} diff --git a/http_handlers.go b/http_handlers.go index df16ad7..5344667 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -97,9 +97,11 @@ func ItemChanWorker(itemChan ItemsChannel) { label := 0 for items := range itemChan { for _, itm := range items { - ITEMS[label] = itm - itm.GeoIndex(label) - label += 1 + if itm != nil { + ITEMS[label] = itm + itm.GeoIndex(label) + label++ + } } } } diff --git a/main.go b/main.go index 314cf20..381185d 100644 --- a/main.go +++ b/main.go @@ -7,6 +7,7 @@ import ( //"github.com/prometheus/client_golang/prometheus" //"github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" + "time" ) type filterFuncc func(*Item, string) bool @@ -68,6 +69,10 @@ func loadcsv(itemChan ItemsChannel) { log.Fatalln(err) } + // make sure channels are empty + time.Sleep(1 * time.Second) + S2CELLS.Sort() + fmt.Println("Sorted") // add timeout there is no garantee ItemsChannel // is empty and you miss a few records // makeIndex() diff --git a/operations.go b/operations.go index e166670..803a7b4 100644 --- a/operations.go +++ b/operations.go @@ -517,6 +517,7 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I fmt.Println("covering cell union not created") } else { geoitems := SearchGeoItems(cu) +<<<<<<< HEAD items = &geoitems fmt.Println(len(geoitems)) } From c370cef5d4a556a0d088160a9342ad93233bdc8d Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Wed, 9 Dec 2020 15:14:03 +0100 Subject: [PATCH 14/54] improve geojson handling --- geo.go | 14 ++++++++++---- geom_s2cover.go | 7 ++++++- http_handlers.go | 29 ++++++++++++++++++++++++++--- operations.go | 8 +++----- 4 files changed, 45 insertions(+), 13 deletions(-) diff --git a/geo.go b/geo.go index 62528ce..c040f9a 100644 --- a/geo.go +++ b/geo.go @@ -40,7 +40,7 @@ type cellIndexNode struct { type s2CellIndex []cellIndexNode type s2CellMap map[int]s2.CellID -//implement Sort interface for s2CellIndex +// Implement Sort interface for s2CellIndex func (c s2CellIndex) Len() int { return len(c) } func (c s2CellIndex) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c s2CellIndex) Less(i, j int) bool { return c[i].ID < c[j].ID } @@ -49,9 +49,9 @@ var S2CELLS s2CellIndex var S2CELLMAP s2CellMap func init() { - minLevel = 7 - maxLevel = 20 - maxCells = 50 + minLevel = 2 + maxLevel = 21 + maxCells = 450 S2CELLS = make(s2CellIndex, 0) S2CELLMAP = s2CellMap{} @@ -74,6 +74,7 @@ func (i Item) GeoIndex(label int) error { } sreader := strings.NewReader(i.GetGeometry()) g, err := wkt.Decode(sreader) + if err != nil { fmt.Println(err.Error()) fmt.Println(i.GetGeometry()) @@ -111,6 +112,7 @@ func (i Item) GeoIndex(label int) error { S2CELLS = append(S2CELLS, cnode) S2CELLMAP[label] = cell.ID() + // Update index while loading data so queries already work if label%100000 == 0 { S2CELLS.Sort() } @@ -149,6 +151,10 @@ func SearchGeoItems(cu s2.CellUnion) labeledItems { min := S2CELLS.Seek(cu[0].ChildBegin()) max := S2CELLS.Seek(cu[len(cu)-1].ChildEnd()) + // ITEMS read lock + lock.RLock() + defer lock.RUnlock() + for _, i := range S2CELLS[min : max+1] { if cu.ContainsCellID(i.ID) { newItems[i.Label] = ITEMS[i.Label] diff --git a/geom_s2cover.go b/geom_s2cover.go index ff27076..203d8fb 100644 --- a/geom_s2cover.go +++ b/geom_s2cover.go @@ -103,7 +103,12 @@ func coverPolygon(p []geom.Point, coverer *s2.RegionCoverer, interior bool) (s2. return nil, errors.New("invalid polygons not enough coordinates for a closed polygon") } if len(p)%2 != 0 { - return nil, errors.New("invalid polygons odd coordinates number") + if p[0] == p[len(p)-1] { + //last element == first element + p = p[1:] + } else { + return nil, errors.New("invalid polygons odd coordinates number") + } } l := LoopFromCoordinatesAndCCW(p, true) diff --git a/http_handlers.go b/http_handlers.go index 5344667..c618056 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -36,9 +36,20 @@ func setHeader(items Items, w http.ResponseWriter, query Query, queryTime int64) } } +func hanleQueryError(err error, w http.ResponseWriter) { + response := make(map[string]string) + w.WriteHeader(500) + response["error"] = err.Error() + json.NewEncoder(w).Encode(response) +} + func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations GroupedOperations) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { - query := parseURLParameters(r) + query, err := parseURLParameters(r) + if err != nil { + hanleQueryError(err, w) + return + } items, queryTime := runQuery(&ITEMS, query, operations) @@ -95,7 +106,9 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group func ItemChanWorker(itemChan ItemsChannel) { label := 0 + for items := range itemChan { + lock.Lock() for _, itm := range items { if itm != nil { ITEMS[label] = itm @@ -103,6 +116,7 @@ func ItemChanWorker(itemChan ItemsChannel) { label++ } } + lock.Unlock() } } @@ -415,7 +429,11 @@ func MIDDLEWARE(cors bool) func(http.Handler) http.Handler { func contextSearchRest(JWTConig jwtConfig, itemChan ItemsChannel, operations GroupedOperations) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { - query := parseURLParameters(r) + query, err := parseURLParameters(r) + if err != nil { + hanleQueryError(err, w) + return + } items, queryTime := runQuery(&ITEMS, query, operations) if len(items) == 0 { @@ -445,7 +463,12 @@ func contextSearchRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Gro func contextTypeAheadRest(JWTConig jwtConfig, itemChan ItemsChannel, operations GroupedOperations) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { - query := parseURLParameters(r) + query, err := parseURLParameters(r) + if err != nil { + hanleQueryError(err, w) + return + } + column := r.URL.Path[len("/typeahead/"):] if column[len(column)-1] == '/' { column = column[:len(column)-1] diff --git a/operations.go b/operations.go index 803a7b4..64041b8 100644 --- a/operations.go +++ b/operations.go @@ -13,6 +13,7 @@ import ( "net/url" "sort" //"reflect" + "errors" "strconv" "strings" "time" @@ -107,7 +108,6 @@ func (q Query) CacheKey() (string, error) { return strings.Join(keys, "-"), nil } -// parseURLParameters checks parameters and builds a query to be run. func parseURLParameters(r *http.Request) (Query, error) { filterMap := make(filterType) excludeMap := make(filterType) @@ -133,11 +133,11 @@ func parseURLParameters(r *http.Request) (Query, error) { if SETTINGS.Get("debug") == "yes" { for key, value := range r.Form { - fmt.Printf("%s = %s\n", key, value) + fmt.Printf("F %s = %s\n", key, value) } for key, value := range urlItems { - fmt.Printf("%s = %s\n", key, value) + fmt.Printf("P %s = %s\n", key, value) } } @@ -440,7 +440,6 @@ func filteredEarlyExitSingle(items *labeledItems, column string, operations Grou return results } -<<<<<<< HEAD // bit Array Filter. // for columns with not so unique values it makes sense te create bitarrays. // to do fast bitwise operations. @@ -517,7 +516,6 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I fmt.Println("covering cell union not created") } else { geoitems := SearchGeoItems(cu) -<<<<<<< HEAD items = &geoitems fmt.Println(len(geoitems)) } From 156297379efff7a98e73f8d22371b18bc8cca4ad Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Mon, 14 Dec 2020 22:05:11 +0100 Subject: [PATCH 15/54] show http table --- Dockerfile | 2 +- main.go | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8ee75ab..c757752 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ RUN apk update && apk add --no-cache git RUN apk --no-cache add ca-certificates WORKDIR /app -COPY . /app/ +COPY *.go /app/ # Fetch dependencies. RUN go get -d -v diff --git a/main.go b/main.go index 381185d..ae19c94 100644 --- a/main.go +++ b/main.go @@ -28,6 +28,7 @@ type ItemsGroupedBy map[string]Items type ItemsChannel chan Items var ITEMS labeledItems +var itemChan ItemsChannel type jwtConfig struct { Enabled bool @@ -54,9 +55,7 @@ const ( DebugColorN = "\033[0;36m%s\033[0m\n" ) -func init() { - -} +func init() {} func loadcsv(itemChan ItemsChannel) { log.Print("loading given csv") From 5059fef64e7f2a6a14c9bd19b85960a344365ca8 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 12 Jan 2021 13:31:21 +0100 Subject: [PATCH 16/54] keep lambda working for data without geometry --- extras/create_model_v2.py | 3 ++- extras/templates/model.template.jinja2 | 8 +++++++- geo.go | 7 ++----- main.go | 1 + 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/extras/create_model_v2.py b/extras/create_model_v2.py index a070240..5544fc1 100644 --- a/extras/create_model_v2.py +++ b/extras/create_model_v2.py @@ -197,7 +197,8 @@ def gocamelCase(string): columnFilters=''.join(columnFilters), registerFilters=''.join(registerFilters), sortColumns=''.join(sortColumns), - indexcolumn=allcolumns[index] + indexcolumn=allcolumns[index], + geometryGetter='""', ) f = open('model.go', 'w') diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index 7916034..1812422 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -1,6 +1,7 @@ package main import ( + "log" "sort" "strconv" "strings" @@ -112,6 +113,11 @@ func (i Item) GetIndex()string{ return Getters{{indexcolumn}}(&i) } + +func (i Item) GetGeometry() string { + return {{geometryGetter}} +} + {{columnFilters}} /* @@ -165,7 +171,7 @@ func validateRegisters() { var filters = []string{"match", "contains", "startswith"} for _, c := range i.Columns() { for _, f := range filters { - if _, ok := RegisterFuncMap[c+"-"+f]; !ok { + if _, ok := RegisterFuncMap[f+"-"+c]; !ok { log.Fatal(c + " is missing in RegisterMap") } } diff --git a/geo.go b/geo.go index c040f9a..8875663 100644 --- a/geo.go +++ b/geo.go @@ -78,16 +78,14 @@ func (i Item) GeoIndex(label int) error { if err != nil { fmt.Println(err.Error()) fmt.Println(i.GetGeometry()) - fmt.Println(i.Ekey) - return fmt.Errorf("wkt error encountered with %s", i.Point) + return fmt.Errorf("wkt error encountered with %s", i.GetGeometry()) } p, err := geom.GetCoordinates(g) if err != nil { fmt.Println(err.Error()) - fmt.Println(i.Ekey) fmt.Println(i.GetGeometry()) - fmt.Printf("geom error encountered with %s", i.Point) + fmt.Printf("geom error encountered with %s", i.GetGeometry()) return fmt.Errorf("geom error") } @@ -99,7 +97,6 @@ func (i Item) GeoIndex(label int) error { ll := s2.LatLngFromDegrees(x, y) if !ll.IsValid() { - fmt.Println(i.Ekey) fmt.Println(i.GetGeometry()) fmt.Printf("ll geom error encountered with %f %f", x, y) return fmt.Errorf("geom error") diff --git a/main.go b/main.go index ae19c94..3e3b8c2 100644 --- a/main.go +++ b/main.go @@ -7,6 +7,7 @@ import ( //"github.com/prometheus/client_golang/prometheus" //"github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" + "net/http" // "runtime/debug" "github.com/pkg/profile") "time" ) From 2ec0762f80f3f6118fdd6dd1a2f4beffee75ff8b Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Thu, 14 Jan 2021 16:05:45 +0100 Subject: [PATCH 17/54] create model now has ingore and geocolumn options --- csv.go | 8 +- extras/create_model_v2.py | 115 ++++++++++++++++++++----- extras/templates/model.template.jinja2 | 38 +++++--- http_handlers.go | 14 +-- main.go | 3 +- 5 files changed, 130 insertions(+), 48 deletions(-) diff --git a/csv.go b/csv.go index 431014c..fe1bea2 100644 --- a/csv.go +++ b/csv.go @@ -62,8 +62,8 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, items := Items{} for { - itemFull := ItemFull{} - columns := itemFull.Columns() + itemIn := ItemIn{} + columns := itemIn.Columns() cols := make([]interface{}, len(columns)) record, err := reader.Read() @@ -98,7 +98,7 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, // marschall it to bytes b, _ := json.Marshal(itemMap) // fill the new Item instance with values - if err := json.Unmarshal([]byte(b), &itemFull); err != nil { + if err := json.Unmarshal([]byte(b), &itemIn); err != nil { line := strings.Join(record, delimiter) failed++ @@ -115,7 +115,7 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, itemChan <- items items = Items{} } - smallItem := itemFull.Shrink() + smallItem := itemIn.Shrink() items = append(items, &smallItem) success++ } diff --git a/extras/create_model_v2.py b/extras/create_model_v2.py index 5544fc1..6cfe6ed 100644 --- a/extras/create_model_v2.py +++ b/extras/create_model_v2.py @@ -30,12 +30,17 @@ loader=FileSystemLoader('./templates'), ) -# keep track of all column names org are original names +# keep track of all column names and all original names in csv allcolumns = [] +allcolumns_org = [] repeated = [] repeated_org = [] unique = [] unique_org = [] +ignored = [] +ignored_org = [] +geocolumns = [] +geocolumns_org = [] def gocamelCase(string): @@ -50,26 +55,43 @@ def gocamelCase(string): # ask some questions about columns. index = 0 for k in row.keys(): + + # go camelcase column names kc = gocamelCase(k) + options = ['r', 'u', 'i', 'g'] while True: # keep asking for valid input - q1 = "a repeated value? has less then (2^16=65536) values? Y/n?" - yesno = input(f"idx:{index} is {k} {q1}") # noqa - if yesno == '': - yesno = 'y' - if yesno not in ['y', 'n']: + q1 = ("a (R)epeated value? has less then (2^16=65536) option. ", + "(U)nique, (G)eo lat/lon point OR (I)gnore ? r/u/g/i?") + action = input(f"idx:{index} is {k} {q1}") # noqa + if action == '': + print(f"pick one from {options}") + continue + if action not in options: continue break - if yesno == 'y': + if action == 'r': repeated.append(kc) repeated_org.append(k) - else: + elif action == 'u': + unique.append(kc) + unique_org.append(k) + elif action == 'i': + ignored.append(kc) + ignored_org.append(k) + elif action == 'g': + geocolumns.append(kc) + geocolumns_org.append(k) unique.append(kc) unique_org.append(k) + else: + print('invalid input') + sys.exit(-1) allcolumns.append(kc) + allcolumns_org.append(k) index += 1 # ask for a index column @@ -78,10 +100,17 @@ def gocamelCase(string): index = input(f"which column is idx? 0 - {len(allcolumns) - 1} ") try: index = int(index) - if index < len(allcolumns): + + if allcolumns[index] in ignored: + print('Selected an ignored column for index') + raise ValueError + + if -1 < index < len(allcolumns): break + except ValueError: continue + print('try again..') # setup initial data structs for each repeated column @@ -92,15 +121,30 @@ def gocamelCase(string): initRepeatColumns.append(initColumntemplate.render(columnname=c)) # create ItemFull struct fields -columnsItemFull = [] +columnsItemIn = [] jsonColumn = env.get_template('itemFullColumn.jinja2') -for c1, c2 in zip(allcolumns, row.keys()): +for c1, c2 in zip(allcolumns, allcolumns_org): onerow = jsonColumn.render(c1=c1, c2=c2) - columnsItemFull.append(onerow) + columnsItemIn.append(onerow) + +# create ItemFull struct fields +columnsItemOut = [] +jsonColumn = env.get_template('itemFullColumn.jinja2') +for c1, c2 in zip(allcolumns, allcolumns_org): + + if c1 in ignored: + continue + + onerow = jsonColumn.render(c1=c1, c2=c2) + columnsItemOut.append(onerow) # create Item struct fields columnsItem = [] -for c1, c2 in zip(allcolumns, row.keys()): +for c1, c2 in zip(allcolumns, allcolumns_org): + + if c1 in ignored: + continue + onerow = f"\t{c1} string\n" if c1 in repeated: onerow = f"\t{c1} uint16\n" @@ -123,6 +167,10 @@ def gocamelCase(string): expandItemFields = [] for c in allcolumns: + + if c in ignored: + continue + if c in repeated: # string to unint shrinkItemFields.append(f"\t\t{c}IdxMap[i.{c}],\n") @@ -133,9 +181,18 @@ def gocamelCase(string): expandItemFields.append(f"\t\ti.{c},\n") -originalColumns = [] -for c in row.keys(): - originalColumns.append(f'\t\t"{c}",\n') +# ItemIn Columns +inColumns = [] +for c in allcolumns_org: + inColumns.append(f'\t\t"{c}",\n') + +# ItemOut Columns +outColumns = [] +for cc, c in zip(allcolumns, allcolumns_org): + # cc CamelCaseColumn. + if cc in ignored: + continue + outColumns.append(f'\t\t"{c}",\n') # create column filters. # match, startswith, contains etc @@ -144,6 +201,9 @@ def gocamelCase(string): filtertemplate = env.get_template("filters.jinja2") for c in allcolumns: + if c in ignored: + continue + lookup = f"i.{c}" if c in repeated: lookup = f"{c}[i.{c}]" @@ -154,7 +214,9 @@ def gocamelCase(string): registerFilters = [] rtempl = env.get_template('registerFilters.jinja2') # register filters -for c, co in zip(allcolumns, row.keys()): +for c, co in zip(allcolumns, allcolumns_org): + if c in ignored: + continue txt = rtempl.render(co=co, column=c) registerFilters.append(txt) @@ -163,7 +225,9 @@ def gocamelCase(string): sortTemplate = env.get_template('sortfunc.jinja2') # create sort functions -for co, c in zip(row.keys(), allcolumns): +for c, co in zip(allcolumns, allcolumns_org): + if c in ignored: + continue c1 = f"items[i].{c} < items[j].{c}" c2 = f"items[i].{c} > items[j].{c}" @@ -177,28 +241,35 @@ def gocamelCase(string): csv_columns = [] -for c in row.keys(): +for c in allcolumns: csv_columns.append(f'\t"{c}",\n') # Finally render the model.go template modeltemplate = env.get_template('model.template.jinja2') +geometryGetter = '""' +print('GEOCOLUMNS: ' + " ".join(geocolumns)) +if len(geocolumns) == 1: + geometryGetter = f"Getters{geocolumns[0]}(&i)" + output = modeltemplate.render( initRepeatColumns=''.join(initRepeatColumns), - columnsItemFull=''.join(columnsItemFull), + columnsItemIn=''.join(columnsItemIn), + columnsItemOut=''.join(columnsItemOut), columnsItem=''.join(columnsItem), shrinkVars=''.join(shrinkVars), shrinkItems=''.join(shrinkItems), shrinkItemFields=''.join(shrinkItemFields), expandItemFields=''.join(expandItemFields), csv_columns=''.join(csv_columns), - originalColumns=''.join(originalColumns), + inColumns=''.join(inColumns), + outColumns=''.join(outColumns), columnFilters=''.join(columnFilters), registerFilters=''.join(registerFilters), sortColumns=''.join(sortColumns), indexcolumn=allcolumns[index], - geometryGetter='""', + geometryGetter=geometryGetter, ) f = open('model.go', 'w') diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index 1812422..5b86764 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -43,27 +43,28 @@ func init() { {{itemStructs}} -type ItemFull struct { +type ItemIn struct { -{{columnsItemFull}} +{{columnsItemIn}} } -type Item struct { +type ItemOut struct { -{{columnsItem}} +{{columnsItemOut}} } -func (i Item) Columns() []string { - return []string{ - {{csv_columns}} - } +type Item struct { + +{{columnsItem}} + } + // Shrink create smaller Item using uint16 -func (i ItemFull) Shrink() Item { +func (i ItemIn) Shrink() Item { lock.Lock() defer lock.Unlock() @@ -77,26 +78,35 @@ func (i ItemFull) Shrink() Item { } } -func (i Item) Serialize() ItemFull { +func (i Item) Serialize() ItemOut { lock.RLock() defer lock.RUnlock() - return ItemFull{ + return ItemOut{ {{expandItemFields}} } } -func (i ItemFull) Columns() []string { +func (i ItemIn) Columns() []string { + return []string{ + +{{inColumns}} + + } +} + +func (i ItemOut) Columns() []string { return []string{ -{{originalColumns}} +{{outColumns}} } } + func (i Item) Row() []string { lock.RLock() @@ -167,7 +177,7 @@ var RegisterReduce registerReduce // ValidateRegsiters validate exposed columns do match filter names func validateRegisters() { - var i = Item{} + var i = ItemOut{} var filters = []string{"match", "contains", "startswith"} for _, c := range i.Columns() { for _, f := range filters { diff --git a/http_handlers.go b/http_handlers.go index c618056..7425a47 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -368,29 +368,29 @@ type Meta struct { } type searchResponse struct { - Count int `json:"count"` - Data ItemsFull `json:"data"` - MMeta *Meta `json:"meta"` + Count int `json:"count"` + Data ItemsOut `json:"data"` + MMeta *Meta `json:"meta"` } func makeResp(items Items) searchResponse { - itemsfull := make(ItemsFull, 0, len(items)) + itemsout := make(ItemsOut, 0, len(items)) for _, oneitem := range items { orgItem := oneitem.Serialize() - itemsfull = append(itemsfull, &orgItem) + itemsout = append(itemsout, &orgItem) } fields := []ShowItem{} - columns := ItemFull{}.Columns() + columns := ItemOut{}.Columns() for _, column := range columns { fields = append(fields, ShowItem{IsShow: true, Name: column, Label: column}) } return searchResponse{ Count: len(items), - Data: itemsfull, + Data: itemsout, MMeta: &Meta{Fields: fields, View: "table"}, } } diff --git a/main.go b/main.go index 3e3b8c2..d8eff38 100644 --- a/main.go +++ b/main.go @@ -23,7 +23,8 @@ type registerFormatMap map[string]formatRespFunc //Items as Example type labeledItems map[int]*Item type Items []*Item -type ItemsFull []*ItemFull +type ItemsIn []*ItemIn +type ItemsOut []*ItemOut type ItemsGroupedBy map[string]Items type ItemsChannel chan Items From b5c3a2f5a8cc05dc37a29e03da37a39b8734cab6 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Thu, 8 Oct 2020 12:11:36 +0200 Subject: [PATCH 18/54] use gzipped csv --- Dockerfile | 1 + csv.go | 1 + 2 files changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index c757752..18b8934 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,6 +23,7 @@ COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ #COPY --from=builder /app/files/ITEMS.txt.gz /app/files/ITEMS.txt.gz WORKDIR /app + # Run the binary. ENV http_db_host "0.0.0.0:8000" diff --git a/csv.go b/csv.go index fe1bea2..74a02c0 100644 --- a/csv.go +++ b/csv.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + //"io/ioutil" "log" "os" "strings" From 0cc33c6d75bf1fb2e422d53e1455b64c8bb2c90a Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 27 Oct 2020 16:21:17 +0100 Subject: [PATCH 19/54] handle review remarks --- csv.go | 1 - http_handlers.go | 1 + main.go | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/csv.go b/csv.go index 74a02c0..fe1bea2 100644 --- a/csv.go +++ b/csv.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - //"io/ioutil" "log" "os" "strings" diff --git a/http_handlers.go b/http_handlers.go index 7425a47..a76a320 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -269,6 +269,7 @@ func handleInputStorage(r *http.Request) (string, storageFunc, retrieveFunc, str storagefunc = STORAGEFUNCS[storagename] } + // TODO do not use ReadAll..but do it line by line s, err := ioutil.ReadAll(fz) if err != nil { return diff --git a/main.go b/main.go index d8eff38..6e23067 100644 --- a/main.go +++ b/main.go @@ -67,7 +67,7 @@ func loadcsv(itemChan ItemsChannel) { SETTINGS.Get("null-delimiter")) if err != nil { - log.Fatalln(err) + log.Print(err) } // make sure channels are empty From 507f79b75ca7f23758a9d0177eb9a89eba31df60 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 19 Jan 2021 16:49:45 +0100 Subject: [PATCH 20/54] add groupby / reduce to query and validate parameters --- extras/templates/model.template.jinja2 | 4 +++ http_handlers.go | 17 ++++++------- main.go | 25 ++++++++++-------- operations.go | 35 ++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 19 deletions(-) diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index 5b86764..83abe3c 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -8,6 +8,10 @@ import ( "sync" ) +type registerGroupByFunc map[string]func(*Item) string +type registerGettersMap map[string]func(*Item) string +type registerReduce map[string]func(Items) map[string]string + type fieldIdxMap map[string]uint16 type fieldMapIdx map[uint16]string type fieldItemmap map[uint16][]*Item diff --git a/http_handlers.go b/http_handlers.go index a76a320..f163f4e 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -2,6 +2,8 @@ package main import ( "encoding/csv" + "encoding/json" + "errors" "fmt" "index/suffixarray" "log" @@ -62,9 +64,7 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group setHeader(items, w, query, queryTime) - groupByS, groupByFound := r.URL.Query()["groupby"] - - if !groupByFound { + if query.GroupBy == "" { if query.ReturnFormat == "csv" { writeCSV(items, w) } else { @@ -75,16 +75,15 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group return } - groupByItems := groupByRunner(items, groupByS[0]) + groupByItems := groupByRunner(items, query.GroupBy) items = nil - reduceName, reduceFound := r.URL.Query()["reduce"] - - if reduceFound { + if query.Reduce != "" { result := make(map[string]map[string]string) - reduceFunc, reduceFuncFound := operations.Reduce[reduceName[0]] + reduceFunc, reduceFuncFound := operations.Reduce[query.Reduce] if !reduceFuncFound { - json.NewEncoder(w).Encode(result) + err = errors.New("invalid reduce") + hanleQueryError(err, w) return } for key, val := range groupByItems { diff --git a/main.go b/main.go index 6e23067..5417bca 100644 --- a/main.go +++ b/main.go @@ -11,15 +11,6 @@ import ( "time" ) -type filterFuncc func(*Item, string) bool -type registerFuncType map[string]filterFuncc -type registerGroupByFunc map[string]func(*Item) string -type registerGettersMap map[string]func(*Item) string -type registerReduce map[string]func(Items) map[string]string -type filterType map[string][]string -type formatRespFunc func(w http.ResponseWriter, r *http.Request, items Items) -type registerFormatMap map[string]formatRespFunc - //Items as Example type labeledItems map[int]*Item type Items []*Item @@ -107,7 +98,12 @@ func main() { ITEMS = labeledItems{} - Operations = GroupedOperations{Funcs: RegisterFuncMap, GroupBy: RegisterGroupBy, Getters: RegisterGetters, Reduce: RegisterReduce} + Operations = GroupedOperations{ + Funcs: RegisterFuncMap, + GroupBy: RegisterGroupBy, + Getters: RegisterGetters, + Reduce: RegisterReduce, + } itemChan := make(ItemsChannel, 1000) go ItemChanWorker(itemChan) @@ -164,6 +160,15 @@ func main() { msg := fmt.Sprint("starting server\nhost: ", ipPort, " with:", len(ITEMS), "items ", "management api's: ", SETTINGS.Get("mgmt") == "y", " jwt enabled: ", JWTConfig.Enabled, " monitoring: ", SETTINGS.Get("prometheus-monitoring") == "yes", " CORS: ", cors) fmt.Printf(InfoColorN, msg) + msg := fmt.Sprint("starting server\nhost: ", ipPort, " with:", len(ITEMS), "items ", "jwt enabled: ", JWTConfig.Enabled) + fmt.Printf(InfoColorN, msg) + + /* + if SETTINGS.Get("debug") == "yes" { + go runPrintMem() + } + */ + middleware := MIDDLEWARE(cors) log.Fatal(http.ListenAndServe(ipPort, middleware(mux))) } diff --git a/operations.go b/operations.go index 64041b8..bf5bd68 100644 --- a/operations.go +++ b/operations.go @@ -40,6 +40,7 @@ func (ft filterType) CacheKey() string { return strings.Join(filterlist, "-") } +type filterType map[string][]string type formatRespFunc func(w http.ResponseWriter, r *http.Request, items Items) type registerFormatMap map[string]formatRespFunc @@ -52,6 +53,9 @@ type Query struct { GroupBy string Reduce string + GroupBy string + Reduce string + Limit int LimitGiven bool Page int @@ -94,6 +98,18 @@ func (q Query) CacheKey() (string, error) { if filterFound { return "", errors.New("bitarrays filters do not need to be cached") } + keys := []string{ + q.ReturnFormat, + } + + return strings.Join(keys, "-"), nil +} + +func decodeUrl(s string) string { + newS, err := url.QueryUnescape(s) + if err != nil { + fmt.Println("oh no error", err) + return s } keys := []string{ @@ -108,10 +124,13 @@ func (q Query) CacheKey() (string, error) { return strings.Join(keys, "-"), nil } +// parseURLParameters checks parameters and builds a query to be run. func parseURLParameters(r *http.Request) (Query, error) { filterMap := make(filterType) excludeMap := make(filterType) anyMap := make(filterType) + groupBy := "" + reduce := "" groupBy := "" reduce := "" @@ -169,6 +188,20 @@ func parseURLParameters(r *http.Request) (Query, error) { // Check and validate reduce parameter parameter, found = r.Form["reduce"] + + parameter, found := urlItems["groupby"] + if found && parameter[0] != "" { + _, funcFound := RegisterGroupBy[parameter[0]] + if !funcFound { + return Query{}, errors.New("Invalid groupby parameter") + } + groupBy = parameter[0] + + } + + // Check and validate reduce parameter + parameter, found = urlItems["reduce"] + if found && parameter[0] != "" { _, funcFound := RegisterReduce[parameter[0]] if !funcFound { @@ -232,6 +265,8 @@ func parseURLParameters(r *http.Request) (Query, error) { Filters: filterMap, Excludes: excludeMap, Anys: anyMap, + GroupBy: groupBy, + Reduce: reduce, GroupBy: groupBy, Reduce: reduce, From 7b832943520a459904cbd8623332e97bb522b3bd Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 19 Jan 2021 22:49:08 +0100 Subject: [PATCH 21/54] groupby cache experiment --- http_handlers.go | 50 ++++++++++++++++++++++++++++++++++++++++++++++-- main.go | 17 +++++++++------- operations.go | 27 ++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 9 deletions(-) diff --git a/http_handlers.go b/http_handlers.go index f163f4e..bfbea4e 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -12,6 +12,7 @@ import ( "sort" "strconv" "strings" + "sync" "time" ) @@ -45,6 +46,37 @@ func hanleQueryError(err error, w http.ResponseWriter) { json.NewEncoder(w).Encode(response) } +type GroupByResult map[string]map[string]string + +var GroupByBodyCache = make(map[string]GroupByResult) +var GroupByHeaderCache = make(map[string]HeaderData) + +var cacheLock = sync.RWMutex{} + +// isCached try to find repsonse in cache (groupby only) +func isCached(w http.ResponseWriter, r *http.Request, query Query) bool { + cacheKey, err := query.CacheKey() + + // fmt.Println(cacheKey) + + if err == nil && len(query.GroupBy) > 0 && len(query.Reduce) > 0 { + cacheLock.Lock() + groupByResult, found := GroupByBodyCache[cacheKey] + headerCache, _ := GroupByHeaderCache[cacheKey] + cacheLock.Unlock() + if found { + + w.Header().Set("Content-Type", "application/json") + for key, val := range headerCache { + w.Header().Set(key, val) + } + json.NewEncoder(w).Encode(groupByResult) + return found + } + } + return false +} + func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations GroupedOperations) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { query, err := parseURLParameters(r) @@ -53,6 +85,10 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group return } + if isCached(w, r, query) { + return + } + items, queryTime := runQuery(&ITEMS, query, operations) msg := fmt.Sprint("total: ", len(ITEMS), " hits: ", len(items), " time: ", queryTime, "ms ", "url: ", r.URL) @@ -79,10 +115,10 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group items = nil if query.Reduce != "" { - result := make(map[string]map[string]string) + result := make(GroupByResult) reduceFunc, reduceFuncFound := operations.Reduce[query.Reduce] if !reduceFuncFound { - err = errors.New("invalid reduce") + err = errors.New("invalid reduce parameter value") hanleQueryError(err, w) return } @@ -95,7 +131,16 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group return } + // Cache group by repsonse + cacheLock.Lock() + cacheKey, _ := query.CacheKey() + GroupByBodyCache[cacheKey] = result + headerData := getHeaderData(items, query, queryTime) + GroupByHeaderCache[cacheKey] = headerData + cacheLock.Unlock() + json.NewEncoder(w).Encode(result) + return } @@ -449,6 +494,7 @@ func contextSearchRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Gro } w.Header().Set("Content-Type", "application/json") + for key, val := range headerData { w.Header().Set(key, val) } diff --git a/main.go b/main.go index 5417bca..d6b7d3f 100644 --- a/main.go +++ b/main.go @@ -6,7 +6,7 @@ import ( "net/http" // "runtime/debug" "github.com/pkg/profile") //"github.com/prometheus/client_golang/prometheus" //"github.com/prometheus/client_golang/prometheus/promauto" - "github.com/prometheus/client_golang/prometheus/promhttp" + // "github.com/prometheus/client_golang/prometheus/promhttp" "net/http" // "runtime/debug" "github.com/pkg/profile") "time" ) @@ -62,11 +62,17 @@ func loadcsv(itemChan ItemsChannel) { } // make sure channels are empty - time.Sleep(1 * time.Second) - S2CELLS.Sort() - fmt.Println("Sorted") // add timeout there is no garantee ItemsChannel // is empty and you miss a few records + time.Sleep(5 * time.Second) + S2CELLS.Sort() + fmt.Println("Sorted") + + cacheLock.Lock() + defer cacheLock.Unlock() + + GroupByBodyCache = make(map[string]GroupByResult) + GroupByHeaderCache = make(map[string]HeaderData) // makeIndex() } @@ -160,9 +166,6 @@ func main() { msg := fmt.Sprint("starting server\nhost: ", ipPort, " with:", len(ITEMS), "items ", "management api's: ", SETTINGS.Get("mgmt") == "y", " jwt enabled: ", JWTConfig.Enabled, " monitoring: ", SETTINGS.Get("prometheus-monitoring") == "yes", " CORS: ", cors) fmt.Printf(InfoColorN, msg) - msg := fmt.Sprint("starting server\nhost: ", ipPort, " with:", len(ITEMS), "items ", "jwt enabled: ", JWTConfig.Enabled) - fmt.Printf(InfoColorN, msg) - /* if SETTINGS.Get("debug") == "yes" { go runPrintMem() diff --git a/operations.go b/operations.go index bf5bd68..f1efc54 100644 --- a/operations.go +++ b/operations.go @@ -12,8 +12,10 @@ import ( "log" "net/url" "sort" + //"reflect" "errors" + "sort" "strconv" "strings" "time" @@ -41,6 +43,16 @@ func (ft filterType) CacheKey() string { } type filterType map[string][]string + +func (ft filterType) CacheKey() string { + filterlist := []string{} + for k, v := range ft { + filterlist = append(filterlist, fmt.Sprintf("%s=%s", k, v)) + } + sort.Strings(filterlist) + return strings.Join(filterlist, "-") +} + type formatRespFunc func(w http.ResponseWriter, r *http.Request, items Items) type registerFormatMap map[string]formatRespFunc @@ -98,11 +110,23 @@ func (q Query) CacheKey() (string, error) { if filterFound { return "", errors.New("bitarrays filters do not need to be cached") } + } + + if q.EarlyExit() { + return "", errors.New("not cached") + } + keys := []string{ + q.Filters.CacheKey(), + q.Excludes.CacheKey(), + q.Anys.CacheKey(), + q.GroupBy, + q.Reduce, q.ReturnFormat, } return strings.Join(keys, "-"), nil + } func decodeUrl(s string) string { @@ -147,6 +171,9 @@ func parseURLParameters(r *http.Request) (Query, error) { } // we can post gejson data + + urlItems := r.URL.Query() + // parse post geojson data r.ParseForm() if SETTINGS.Get("debug") == "yes" { From 8a592468ad7f4004f745d0478b730409e586d852 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 26 Jan 2021 01:13:00 +0100 Subject: [PATCH 22/54] start factoring bitarray into templateable code --- .gitignore | 1 + csv.go | 12 +++---- curlgeotest.sh | 22 ++++++++++++ curltest.sh | 18 ++-------- geo.go | 8 +++-- http_handlers.go | 74 ++++++++-------------------------------- main.go | 25 +++++--------- operations.go | 77 ++++++++++++++++++++++++++++++------------ parse_pg_array.go | 49 +++++++++++++++++++++++++++ parse_pg_array_test.go | 37 ++++++++++++++++++++ store.go | 41 ++++++++++++++++++++++ 11 files changed, 241 insertions(+), 123 deletions(-) create mode 100644 .gitignore create mode 100755 curlgeotest.sh create mode 100644 parse_pg_array.go create mode 100644 parse_pg_array_test.go create mode 100644 store.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..80abf77 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +extras/model.go diff --git a/csv.go b/csv.go index fe1bea2..c9c1c98 100644 --- a/csv.go +++ b/csv.go @@ -5,14 +5,13 @@ import ( "encoding/json" "errors" "fmt" + csv "github.com/JensRantil/go-csv" + "github.com/cheggaaa/pb" "io" "log" "os" "strings" "unicode/utf8" - - csv "github.com/JensRantil/go-csv" - "github.com/cheggaaa/pb" ) func containsDelimiter(col string) bool { @@ -59,7 +58,7 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, success := 0 failed := 0 - items := Items{} + items := ItemsIn{} for { itemIn := ItemIn{} @@ -113,10 +112,9 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, if len(items) > 100000 { itemChan <- items - items = Items{} + items = ItemsIn{} } - smallItem := itemIn.Shrink() - items = append(items, &smallItem) + items = append(items, &itemIn) success++ } diff --git a/curlgeotest.sh b/curlgeotest.sh new file mode 100755 index 0000000..fb0d54f --- /dev/null +++ b/curlgeotest.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +set -x +set -e +set -u + + +curl -vvv \ + --data-urlencode 'geojson={ + "type": "Polygon", + "coordinates": [ + [ + [4.902321, 52.428306], + [4.90127, 52.427024], + [4.905281, 52.426069], + [4.906782, 52.426226], + [4.906418, 52.427469], + [4.902321, 52.428306] + ] + ] + }' \ + 'http://127.0.0.1:8000/list/?groupby=postcode&reduce=count' diff --git a/curltest.sh b/curltest.sh index fb0d54f..080ea17 100755 --- a/curltest.sh +++ b/curltest.sh @@ -4,19 +4,5 @@ set -x set -e set -u - -curl -vvv \ - --data-urlencode 'geojson={ - "type": "Polygon", - "coordinates": [ - [ - [4.902321, 52.428306], - [4.90127, 52.427024], - [4.905281, 52.426069], - [4.906782, 52.426226], - [4.906418, 52.427469], - [4.902321, 52.428306] - ] - ] - }' \ - 'http://127.0.0.1:8000/list/?groupby=postcode&reduce=count' +curl -vv 'http://127.0.0.1:8000/list/?groupby=woning_type&reduce=count' +curl -vv 'http://127.0.0.1:8000/list/?match-wijkcode=WK036394&groupby=woning_type&reduce=count' diff --git a/geo.go b/geo.go index 8875663..9419db1 100644 --- a/geo.go +++ b/geo.go @@ -67,8 +67,12 @@ func (c cellIndexNode) IsEmpty() bool { return c.ID == 0 } -//GeoIndex for each items determine S2Cell and store it. +// GeoIndex for each items determine S2Cell and store it. func (i Item) GeoIndex(label int) error { + + lock.Lock() + defer lock.Unlock() + if i.GetGeometry() == "" { return fmt.Errorf("missing wkt geometry") } @@ -154,7 +158,7 @@ func SearchGeoItems(cu s2.CellUnion) labeledItems { for _, i := range S2CELLS[min : max+1] { if cu.ContainsCellID(i.ID) { - newItems[i.Label] = ITEMS[i.Label] + newItems = append(newItems, ITEMS[i.Label]) } } diff --git a/http_handlers.go b/http_handlers.go index bfbea4e..fa226b2 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -57,15 +57,12 @@ var cacheLock = sync.RWMutex{} func isCached(w http.ResponseWriter, r *http.Request, query Query) bool { cacheKey, err := query.CacheKey() - // fmt.Println(cacheKey) - if err == nil && len(query.GroupBy) > 0 && len(query.Reduce) > 0 { cacheLock.Lock() groupByResult, found := GroupByBodyCache[cacheKey] headerCache, _ := GroupByHeaderCache[cacheKey] cacheLock.Unlock() if found { - w.Header().Set("Content-Type", "application/json") for key, val := range headerCache { w.Header().Set(key, val) @@ -85,9 +82,10 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group return } - if isCached(w, r, query) { - return - } + //if isCached(w, r, query) { + // fmt.Println(InfoColor, "cache used") + // return + //} items, queryTime := runQuery(&ITEMS, query, operations) @@ -148,26 +146,10 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group } } -func ItemChanWorker(itemChan ItemsChannel) { - label := 0 - - for items := range itemChan { - lock.Lock() - for _, itm := range items { - if itm != nil { - ITEMS[label] = itm - itm.GeoIndex(label) - label++ - } - } - lock.Unlock() - } -} - func contextAddRest(JWTConig jwtConfig, itemChan ItemsChannel, operations GroupedOperations) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { jsonDecoder := json.NewDecoder(r.Body) - var items Items + var items ItemsIn err := jsonDecoder.Decode(&items) if err != nil { fmt.Println(err) @@ -370,37 +352,6 @@ func validColumn(column string, columns []string) bool { return false } -// Other wise also known in mathematics as set but in http name it would be confused with the verb set. -//func UniqueValuesInColumn(w http.ResponseWriter, r *http.Request) { -// column := r.URL.Path[1:] -// response := make(map[string]string) -// if len(ITEMS) == 0 { -// response["message"] = fmt.Sprint("invalid input: ", column) -// w.WriteHeader(400) -// json.NewEncoder(w).Encode(response) -// return -// -// } -// validColumns := ITEMS[0].Columns() -// -// if !validColumn(column, validColumns) { -// w.WriteHeader(400) -// -// response["message"] = fmt.Sprint("invalid input: ", column) -// response["input"] = column -// response["valid input"] = strings.Join(validColumns, ", ") -// json.NewEncoder(w).Encode(response) -// return -// } -// set := make(map[string]bool) -// for item := range ITEMS { -// r := reflect.ValueOf(item) -// value := reflect.Indirect(r).FieldByName(column) -// valu -// set[value.Str()] = true -// } -// -//} type ShowItem struct { IsShow bool `json:"isShow"` Label string `json:"label"` @@ -475,16 +426,19 @@ func MIDDLEWARE(cors bool) func(http.Handler) http.Handler { func contextSearchRest(JWTConig jwtConfig, itemChan ItemsChannel, operations GroupedOperations) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { query, err := parseURLParameters(r) + if err != nil { hanleQueryError(err, w) return } items, queryTime := runQuery(&ITEMS, query, operations) + if len(items) == 0 { w.WriteHeader(404) return } + msg := fmt.Sprint("total: ", len(ITEMS), " hits: ", len(items), " time: ", queryTime, "ms ", "url: ", r.URL) fmt.Printf(NoticeColorN, msg) headerData := getHeaderData(items, query, queryTime) @@ -519,11 +473,13 @@ func contextTypeAheadRest(JWTConig jwtConfig, itemChan ItemsChannel, operations if column[len(column)-1] == '/' { column = column[:len(column)-1] } - if _, ok := operations.Getters[column]; !ok { - w.WriteHeader(404) - w.Write([]byte("column is not found")) - return - } + /* + if _, ok := operations.Getters[column]; !ok { + w.WriteHeader(404) + w.Write([]byte("wrong column name")) + return + } + */ results, queryTime := runTypeAheadQuery(&ITEMS, column, query, operations) if len(results) == 0 { diff --git a/main.go b/main.go index d6b7d3f..1b4ccae 100644 --- a/main.go +++ b/main.go @@ -11,18 +11,6 @@ import ( "time" ) -//Items as Example -type labeledItems map[int]*Item -type Items []*Item -type ItemsIn []*ItemIn -type ItemsOut []*ItemOut - -type ItemsGroupedBy map[string]Items -type ItemsChannel chan Items - -var ITEMS labeledItems -var itemChan ItemsChannel - type jwtConfig struct { Enabled bool SharedSecret string @@ -100,15 +88,18 @@ func main() { SETTINGS.Set("readonly", "yes", "only allow read only funcions") SETTINGS.Set("debug", "no", "print memory usage") + SETTINGS.Set("groupbycache", "yes", "use in memory cache") + SETTINGS.Parse() - ITEMS = labeledItems{} + // ITEMS = labeledItems{} Operations = GroupedOperations{ - Funcs: RegisterFuncMap, - GroupBy: RegisterGroupBy, - Getters: RegisterGetters, - Reduce: RegisterReduce, + Funcs: RegisterFuncMap, + GroupBy: RegisterGroupBy, + Getters: RegisterGetters, + Reduce: RegisterReduce, + BitArrays: RegisterBitArray, } itemChan := make(ItemsChannel, 1000) diff --git a/operations.go b/operations.go index f1efc54..69c9e6a 100644 --- a/operations.go +++ b/operations.go @@ -3,18 +3,11 @@ package main import ( "encoding/json" "fmt" - "github.com/go-spatial/geom" - "github.com/go-spatial/geom/encoding/geojson" "net/http" - - // "reflect" - "errors" - "log" "net/url" "sort" - //"reflect" - "errors" + "log" "sort" "strconv" "strings" @@ -139,6 +132,7 @@ func decodeUrl(s string) string { keys := []string{ q.Filters.CacheKey(), q.Excludes.CacheKey(), + q.BitArrays.CacheKey(), q.Anys.CacheKey(), q.GroupBy, q.Reduce, @@ -156,9 +150,6 @@ func parseURLParameters(r *http.Request) (Query, error) { groupBy := "" reduce := "" - groupBy := "" - reduce := "" - //TODO change query to be based on input. // parse params and body posts // (geo)json data @@ -173,18 +164,13 @@ func parseURLParameters(r *http.Request) (Query, error) { // we can post gejson data urlItems := r.URL.Query() - // parse post geojson data + // parse params and body posts // (geo)json data r.ParseForm() if SETTINGS.Get("debug") == "yes" { - for key, value := range r.Form { fmt.Printf("F %s = %s\n", key, value) } - for key, value := range urlItems { - - fmt.Printf("P %s = %s\n", key, value) - } } for k := range RegisterFuncMap { @@ -204,6 +190,7 @@ func parseURLParameters(r *http.Request) (Query, error) { // Check and validate groupby parameter parameter, found := r.Form["groupby"] + if found && parameter[0] != "" { _, funcFound1 := RegisterGroupBy[parameter[0]] _, funcFound2 := RegisterGroupByCustom[parameter[0]] @@ -217,6 +204,7 @@ func parseURLParameters(r *http.Request) (Query, error) { parameter, found = r.Form["reduce"] parameter, found := urlItems["groupby"] + if found && parameter[0] != "" { _, funcFound := RegisterGroupBy[parameter[0]] if !funcFound { @@ -227,7 +215,7 @@ func parseURLParameters(r *http.Request) (Query, error) { } // Check and validate reduce parameter - parameter, found = urlItems["reduce"] + parameter, found = r.Form["reduce"] if found && parameter[0] != "" { _, funcFound := RegisterReduce[parameter[0]] @@ -292,8 +280,9 @@ func parseURLParameters(r *http.Request) (Query, error) { Filters: filterMap, Excludes: excludeMap, Anys: anyMap, - GroupBy: groupBy, - Reduce: reduce, + + GroupBy: groupBy, + Reduce: reduce, GroupBy: groupBy, Reduce: reduce, @@ -466,7 +455,13 @@ func filteredEarlyExitSingle(items *labeledItems, column string, operations Grou lock.RLock() defer lock.RUnlock() +<<<<<<< HEAD for _, item := range items { +======= + // TODO candidate for speedup + + for _, item := range *items { +>>>>>>> 164af00... start factoring bitarray into templateable code if !any(item, anys, registerFuncs) { continue } @@ -518,12 +513,17 @@ func bitArrayFilter( combinedBitArrays := make([]bitarray.BitArray, 0) +<<<<<<< HEAD for k := range operations.BitArrays { parameter, foundkey := query.Filters["match-"+k] if len(parameter) == 0 { continue } +======= + for k, _ := range operations.BitArrays { + parameter, foundkey := query.Filters[k] +>>>>>>> 164af00... start factoring bitarray into templateable code if !foundkey { continue } @@ -539,8 +539,13 @@ func bitArrayFilter( if len(combinedBitArrays) > 0 { bitArrayResult = combinedBitArrays[0] +<<<<<<< HEAD } else { log.Println("no bitarrays found") +======= + fmt.Println(bitArrayResult) + } else { +>>>>>>> 164af00... start factoring bitarray into templateable code return nil, errors.New("no bitarray found") } @@ -551,9 +556,16 @@ func bitArrayFilter( } } +<<<<<<< HEAD // TODO OR // TODO EXCLUDE +======= + fmt.Println(len(combinedBitArrays)) + // TODO OR + // TODO EXCLUDE + fmt.Println(bitArrayResult) +>>>>>>> 164af00... start factoring bitarray into templateable code if bitArrayResult == nil { log.Fatal("something went wrong with bitarray..") } @@ -561,8 +573,26 @@ func bitArrayFilter( newItems := make(labeledItems, 0) labels := bitArrayResult.ToNums() +<<<<<<< HEAD for _, l := range labels { newItems = append(newItems, (*items)[int(l)]) +======= + /* + b1 := (*items)[int(labels[0])].Serialize().Buurtcode + b2 := (*items)[int(labels[len(labels)-1])].Serialize().Buurtcode + + // sanity check. + if !(b1 == b2 && b2 == p[0]) { + msg := fmt.Sprintf( + "bitarray indexing error values mismatch! !(%s == %s == %s)", + b1, b2, p[0]) + log.Fatal(msg) + } + */ + + for _, l := range labels { + newItems[int(l)] = (*items)[int(l)] +>>>>>>> 164af00... start factoring bitarray into templateable code } return newItems, nil @@ -592,6 +622,7 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I nextItems = &filteredItems } +<<<<<<< HEAD if query.IndexGiven && len(STR_INDEX) > 0 { items = make(Items, 0) indices := INDEX.Lookup([]byte(query.IndexQuery), -1) @@ -622,10 +653,12 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I } } +======= +>>>>>>> 164af00... start factoring bitarray into templateable code if query.EarlyExit() { - newItems = filteredEarlyExit(items, operations, query) + newItems = filteredEarlyExit(nextItems, operations, query) } else { - newItems = filtered(items, operations, query) + newItems = filtered(nextItems, operations, query) } diff := time.Since(start) diff --git a/parse_pg_array.go b/parse_pg_array.go new file mode 100644 index 0000000..42f6095 --- /dev/null +++ b/parse_pg_array.go @@ -0,0 +1,49 @@ +package main + +import ( + "bytes" + "errors" +) + +func ParsePGArray(array string) ([]string, error) { + var out []string + var arrayOpened, quoteOpened, escapeOpened bool + item := &bytes.Buffer{} + for _, r := range array { + switch { + case !arrayOpened: + if r != '{' { + return nil, errors.New("Doesn't appear to be a postgres array. Doesn't start with an opening curly brace.") + } + arrayOpened = true + case escapeOpened: + item.WriteRune(r) + escapeOpened = false + case quoteOpened: + switch r { + case '\\': + escapeOpened = true + case '"': + quoteOpened = false + if item.String() == "NULL" { + item.Reset() + } + default: + item.WriteRune(r) + } + case r == '}': + // done + out = append(out, item.String()) + return out, nil + case r == '"': + quoteOpened = true + case r == ',': + // end of item + out = append(out, item.String()) + item.Reset() + default: + item.WriteRune(r) + } + } + return nil, errors.New("Doesn't appear to be a postgres array. Premature end of string.") +} diff --git a/parse_pg_array_test.go b/parse_pg_array_test.go new file mode 100644 index 0000000..d0a8ca0 --- /dev/null +++ b/parse_pg_array_test.go @@ -0,0 +1,37 @@ +package main + +import ( + "reflect" + "testing" +) + +func TestParseArray(t *testing.T) { + scanTests := []struct { + in string + out []string + }{ + {"{one,two}", []string{"one", "two"}}, + {`{"one, sdf",two}`, []string{"one, sdf", "two"}}, + {`{"\"one\"",two}`, []string{`"one"`, "two"}}, + {`{"\\one\\",two}`, []string{`\one\`, "two"}}, + {`{"{one}",two}`, []string{`{one}`, "two"}}, + {`{"one two"}`, []string{`one two`}}, + {`{"one,two"}`, []string{`one,two`}}, + {`{abcdef:83bf98cc-fec9-4e77-b4cf-99f9fb6655fa-0NH:zxcvzxc:wers:vxdfw-asdf-asdf}`, []string{"abcdef:83bf98cc-fec9-4e77-b4cf-99f9fb6655fa-0NH:zxcvzxc:wers:vxdfw-asdf-asdf"}}, + {`{"",two}`, []string{"", "two"}}, + {`{" ","NULL"}`, []string{" ", ""}}, + } + + for tcNumber, testcase := range scanTests { + result, err := ParsePGArray(testcase.in) + if err != nil { + t.Error("testcase", tcNumber, "gave error") + } + if len(result) == 0 { + t.Error("testcase", tcNumber, "expected", "found", "!=", "not found") + } + if !reflect.DeepEqual(result, testcase.out) { + t.Error("testcase", tcNumber, "expected", testcase.out, "!=", result) + } + } +} diff --git a/store.go b/store.go new file mode 100644 index 0000000..6d35460 --- /dev/null +++ b/store.go @@ -0,0 +1,41 @@ +package main + +import ( + "log" +) + +//Items +type labeledItems []*Item +type Items []*Item +type ItemsIn []*ItemIn +type ItemsOut []*ItemOut + +type ItemsGroupedBy map[string]Items +type ItemsChannel chan ItemsIn + +var ITEMS labeledItems +var itemChan ItemsChannel + +func init() { + ITEMS = labeledItems{} +} + +func ItemChanWorker(itemChan ItemsChannel) { + label := 0 + + for items := range itemChan { + for _, itm := range items { + if itm != nil { + smallItem := itm.Shrink(label) + smallItem.StoreBitArrayColumns() + ITEMS = append(ITEMS, &smallItem) + //ITEMS[label] = &smallItem + if ITEMS[label] != &smallItem { + log.Fatal("storing item index off") + } + smallItem.GeoIndex(label) + label++ + } + } + } +} From 6b7be319a0382457f119caf831f396441b97e274 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 26 Jan 2021 01:51:34 +0100 Subject: [PATCH 23/54] fix bugs using multiple bitarray keys --- .dockerignore | 3 +++ main.go | 13 +++--------- operations.go | 56 ++------------------------------------------------- store.go | 2 +- 4 files changed, 9 insertions(+), 65 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..8254c06 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +*.csv +*.csv2 +.git diff --git a/main.go b/main.go index 1b4ccae..d694fd4 100644 --- a/main.go +++ b/main.go @@ -2,12 +2,11 @@ package main import ( "fmt" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/client_golang/prometheus/promhttp" "log" "net/http" // "runtime/debug" "github.com/pkg/profile") - //"github.com/prometheus/client_golang/prometheus" - //"github.com/prometheus/client_golang/prometheus/promauto" - // "github.com/prometheus/client_golang/prometheus/promhttp" - "net/http" // "runtime/debug" "github.com/pkg/profile") "time" ) @@ -157,12 +156,6 @@ func main() { msg := fmt.Sprint("starting server\nhost: ", ipPort, " with:", len(ITEMS), "items ", "management api's: ", SETTINGS.Get("mgmt") == "y", " jwt enabled: ", JWTConfig.Enabled, " monitoring: ", SETTINGS.Get("prometheus-monitoring") == "yes", " CORS: ", cors) fmt.Printf(InfoColorN, msg) - /* - if SETTINGS.Get("debug") == "yes" { - go runPrintMem() - } - */ - middleware := MIDDLEWARE(cors) log.Fatal(http.ListenAndServe(ipPort, middleware(mux))) } diff --git a/operations.go b/operations.go index 69c9e6a..2f10989 100644 --- a/operations.go +++ b/operations.go @@ -8,7 +8,7 @@ import ( "sort" "log" - "sort" + //"sort" "strconv" "strings" "time" @@ -35,8 +35,6 @@ func (ft filterType) CacheKey() string { return strings.Join(filterlist, "-") } -type filterType map[string][]string - func (ft filterType) CacheKey() string { filterlist := []string{} for k, v := range ft { @@ -58,9 +56,6 @@ type Query struct { GroupBy string Reduce string - GroupBy string - Reduce string - Limit int LimitGiven bool Page int @@ -125,14 +120,12 @@ func (q Query) CacheKey() (string, error) { func decodeUrl(s string) string { newS, err := url.QueryUnescape(s) if err != nil { - fmt.Println("oh no error", err) - return s + return "", errors.New("bitarrays not cached") } keys := []string{ q.Filters.CacheKey(), q.Excludes.CacheKey(), - q.BitArrays.CacheKey(), q.Anys.CacheKey(), q.GroupBy, q.Reduce, @@ -455,13 +448,7 @@ func filteredEarlyExitSingle(items *labeledItems, column string, operations Grou lock.RLock() defer lock.RUnlock() -<<<<<<< HEAD for _, item := range items { -======= - // TODO candidate for speedup - - for _, item := range *items { ->>>>>>> 164af00... start factoring bitarray into templateable code if !any(item, anys, registerFuncs) { continue } @@ -513,17 +500,12 @@ func bitArrayFilter( combinedBitArrays := make([]bitarray.BitArray, 0) -<<<<<<< HEAD for k := range operations.BitArrays { parameter, foundkey := query.Filters["match-"+k] if len(parameter) == 0 { continue } -======= - for k, _ := range operations.BitArrays { - parameter, foundkey := query.Filters[k] ->>>>>>> 164af00... start factoring bitarray into templateable code if !foundkey { continue } @@ -539,13 +521,7 @@ func bitArrayFilter( if len(combinedBitArrays) > 0 { bitArrayResult = combinedBitArrays[0] -<<<<<<< HEAD } else { - log.Println("no bitarrays found") -======= - fmt.Println(bitArrayResult) - } else { ->>>>>>> 164af00... start factoring bitarray into templateable code return nil, errors.New("no bitarray found") } @@ -556,16 +532,9 @@ func bitArrayFilter( } } -<<<<<<< HEAD // TODO OR // TODO EXCLUDE -======= - fmt.Println(len(combinedBitArrays)) - // TODO OR - // TODO EXCLUDE - fmt.Println(bitArrayResult) ->>>>>>> 164af00... start factoring bitarray into templateable code if bitArrayResult == nil { log.Fatal("something went wrong with bitarray..") } @@ -573,26 +542,8 @@ func bitArrayFilter( newItems := make(labeledItems, 0) labels := bitArrayResult.ToNums() -<<<<<<< HEAD for _, l := range labels { newItems = append(newItems, (*items)[int(l)]) -======= - /* - b1 := (*items)[int(labels[0])].Serialize().Buurtcode - b2 := (*items)[int(labels[len(labels)-1])].Serialize().Buurtcode - - // sanity check. - if !(b1 == b2 && b2 == p[0]) { - msg := fmt.Sprintf( - "bitarray indexing error values mismatch! !(%s == %s == %s)", - b1, b2, p[0]) - log.Fatal(msg) - } - */ - - for _, l := range labels { - newItems[int(l)] = (*items)[int(l)] ->>>>>>> 164af00... start factoring bitarray into templateable code } return newItems, nil @@ -622,7 +573,6 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I nextItems = &filteredItems } -<<<<<<< HEAD if query.IndexGiven && len(STR_INDEX) > 0 { items = make(Items, 0) indices := INDEX.Lookup([]byte(query.IndexQuery), -1) @@ -653,8 +603,6 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I } } -======= ->>>>>>> 164af00... start factoring bitarray into templateable code if query.EarlyExit() { newItems = filteredEarlyExit(nextItems, operations, query) } else { diff --git a/store.go b/store.go index 6d35460..750327c 100644 --- a/store.go +++ b/store.go @@ -29,7 +29,7 @@ func ItemChanWorker(itemChan ItemsChannel) { smallItem := itm.Shrink(label) smallItem.StoreBitArrayColumns() ITEMS = append(ITEMS, &smallItem) - //ITEMS[label] = &smallItem + // ITEMS[label] = &smallItem if ITEMS[label] != &smallItem { log.Fatal("storing item index off") } From 0d224b921d788b0d7857d42b4b04b4987e5cccd8 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 26 Jan 2021 14:37:39 +0100 Subject: [PATCH 24/54] fix cache headers --- curltest.sh | 3 +++ http_handlers.go | 9 +++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/curltest.sh b/curltest.sh index 080ea17..29291ae 100755 --- a/curltest.sh +++ b/curltest.sh @@ -4,5 +4,8 @@ set -x set -e set -u +# should be cached. curl -vv 'http://127.0.0.1:8000/list/?groupby=woning_type&reduce=count' + +# should not be cached.(using bitmaps) curl -vv 'http://127.0.0.1:8000/list/?match-wijkcode=WK036394&groupby=woning_type&reduce=count' diff --git a/http_handlers.go b/http_handlers.go index fa226b2..6d1cd55 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -64,9 +64,11 @@ func isCached(w http.ResponseWriter, r *http.Request, query Query) bool { cacheLock.Unlock() if found { w.Header().Set("Content-Type", "application/json") + for key, val := range headerCache { w.Header().Set(key, val) } + w.Header().Set("used-cache", "yes") json.NewEncoder(w).Encode(groupByResult) return found } @@ -82,10 +84,9 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group return } - //if isCached(w, r, query) { - // fmt.Println(InfoColor, "cache used") - // return - //} + if isCached(w, r, query) { + return + } items, queryTime := runQuery(&ITEMS, query, operations) From b4be4ae8f2bb22a5ca3950f2c4db12d04f7b8823 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 26 Jan 2021 16:13:42 +0100 Subject: [PATCH 25/54] update model template --- extras/templates/model.template.jinja2 | 49 ++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index 83abe3c..c7fce8d 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -6,15 +6,19 @@ import ( "strconv" "strings" "sync" + + "github.com/Workiva/go-datastructures/bitarray" ) type registerGroupByFunc map[string]func(*Item) string type registerGettersMap map[string]func(*Item) string type registerReduce map[string]func(Items) map[string]string +type registerBitArray map[string]func(s string) (bitarray.BitArray, error) + type fieldIdxMap map[string]uint16 type fieldMapIdx map[uint16]string -type fieldItemmap map[uint16][]*Item +type fieldItemsMap map[uint16]bitarray.BitArray // Column maps. // Store for each non distinct/repeated column @@ -31,8 +35,12 @@ var {columnname} fieldMapIdx var {columnname}Items fieldItemmap */ +// item map lock var lock = sync.RWMutex{} +// bitArray Lock +var balock = sync.RWMutex{} + func init() { {{initRepeatColumns}} @@ -62,13 +70,14 @@ type ItemOut struct { type Item struct { + Label int // internal index in ITEMS {{columnsItem}} } // Shrink create smaller Item using uint16 -func (i ItemIn) Shrink() Item { +func (i ItemIn) Shrink(label int) Item { lock.Lock() defer lock.Unlock() @@ -77,11 +86,41 @@ func (i ItemIn) Shrink() Item { return Item{ + label, + {{shrinkItemFields}} } } +// Store selected columns in byte array +func (i Item) StoreBitArrayColumns() { + + balock.Lock() + defer balock.Unlock() + + lock.RLock() + defer lock.RUnlock() + + /* + + var ba = birarray.BitArray + var ok = error + + // Column Buurtcode has byte arrays for + ba, ok = BuurtcodeItems[i.Buurtcode] + if !ok { + ba = bitarray.NewSparseBitArray() + BuurtcodeItems[i.Buurtcode] = ba + } + + ba.SetBit(uint64(i.Label)) + + TODO ADD BIT-ARRAY COLUMNS. + */ + +} + func (i Item) Serialize() ItemOut { lock.RLock() @@ -170,6 +209,7 @@ type GroupedOperations struct { GroupBy registerGroupByFunc Getters registerGettersMap Reduce registerReduce + BitArrays registerBitArray } var Operations GroupedOperations @@ -178,6 +218,7 @@ var RegisterFuncMap registerFuncType var RegisterGroupBy registerGroupByFunc var RegisterGetters registerGettersMap var RegisterReduce registerReduce +var RegisterBitArray registerBitArray // ValidateRegsiters validate exposed columns do match filter names func validateRegisters() { @@ -201,6 +242,10 @@ func init() { // register search filter. //RegisterFuncMap["search"] = 'EDITYOURSELF' + // example RegisterFuncMap["search"] = FilterEkeyStartsWith + + //RegisterFuncMap["value"] = 'EDITYOURSELF' + // example RegisterGetters["value"] = GettersEkey // register filters From 1fbfaf56d3d4a79b748b9e4851d832f221489500 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Wed, 27 Jan 2021 13:33:47 +0100 Subject: [PATCH 26/54] start with bitarray templateing --- extras/create_model_v2.py | 38 +++++++++++++++++++++----- extras/templates/model.template.jinja2 | 15 +++++++++- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/extras/create_model_v2.py b/extras/create_model_v2.py index 6cfe6ed..b9a03bb 100644 --- a/extras/create_model_v2.py +++ b/extras/create_model_v2.py @@ -2,11 +2,20 @@ """ Load first rows from csv, ask some questions and generate a models.go to jumpstart -your project for the given csv file. +your lambda_db project for the given csv file + +models.go contains all the field information +and functions of rows in your data. + +- Repeated option to store repeated + values in a map and each individual items + only stores uint16 reference to map key. + +- BitArray option which is like Repeated + value but also creates a map[key]bitmap for all + items containing field value. Makes it possible + to do fast 'match' lookups. -Much morge memory efficient then v1 because repeated -values are now stored in a map and each individual item -only stores uint16 reference. python create_model.py your.csv """ @@ -35,6 +44,9 @@ allcolumns_org = [] repeated = [] repeated_org = [] +bitarray = [] +bitarray_org = [] +unique = [] unique = [] unique_org = [] ignored = [] @@ -59,11 +71,13 @@ def gocamelCase(string): # go camelcase column names kc = gocamelCase(k) - options = ['r', 'u', 'i', 'g'] + options = ['r', 'u', 'i', 'g', 'b'] while True: # keep asking for valid input - q1 = ("a (R)epeated value? has less then (2^16=65536) option. ", - "(U)nique, (G)eo lat/lon point OR (I)gnore ? r/u/g/i?") + q1 = ("(R)epeated value? has less then (2^16=65536) option.", + "(B)itarray, repeated column optimized for fast match.", + "(U)nique, (G)eo lat/lon point or (I)gnore ? r/b/u/g/i?." + ) action = input(f"idx:{index} is {k} {q1}") # noqa if action == '': print(f"pick one from {options}") @@ -86,6 +100,12 @@ def gocamelCase(string): geocolumns_org.append(k) unique.append(kc) unique_org.append(k) + elif action == 'b': + # same as repeated but with some extra bitarray stuff + repeated.append(kc) + repeated_org.append(k) + bitarray.append(k) + bitarray.append(k) else: print('invalid input') sys.exit(-1) @@ -162,6 +182,10 @@ def gocamelCase(string): shrinkItems.append(shrinktemplate.render(column=c)) +bitArrayGetters = [] +bitArrayStore = [] + + # create the actual shrinked/expand Item fields. shrinkItemFields = [] expandItemFields = [] diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index c7fce8d..d6b41d0 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -1,3 +1,15 @@ +/* + model.go define the 'items' to store. + All columns with getters and setters are defined here. + + ItemIn, represent rows from the Input data + Item, the compact item stored in memmory + ItemOut, defines how and which fields are exported out + of the API. It is possible to ignore input columns + + Optionally bitarrays are created. +*/ + package main import ( @@ -93,7 +105,8 @@ func (i ItemIn) Shrink(label int) Item { } } -// Store selected columns in byte array +// Store selected columns in seperate map[columnvalue]bitarray +// for gast item lookup func (i Item) StoreBitArrayColumns() { balock.Lock() From 197e3dc17e834506ef125f1ed7ecaf257dd4b771 Mon Sep 17 00:00:00 2001 From: Stephan Preeker Date: Wed, 3 Feb 2021 02:14:54 +0100 Subject: [PATCH 27/54] bit array model code generation is working now --- extras/create_model_v2.py | 37 +++++++++++------- .../templates/bitarrayGetter.template.jinja2 | 19 +++++++++ extras/templates/initColumn.template.jinja2 | 10 +++-- extras/templates/model.template.jinja2 | 39 ++++++++++++++----- extras/templates/registerFilters.jinja2 | 15 ++++--- extras/templates/shrinkVars.jinja2 | 3 ++ .../templates/storebitarray.template.jinja2 | 9 +++++ operations.go | 5 +++ 8 files changed, 105 insertions(+), 32 deletions(-) create mode 100644 extras/templates/bitarrayGetter.template.jinja2 create mode 100644 extras/templates/storebitarray.template.jinja2 diff --git a/extras/create_model_v2.py b/extras/create_model_v2.py index b9a03bb..31a2177 100644 --- a/extras/create_model_v2.py +++ b/extras/create_model_v2.py @@ -74,9 +74,10 @@ def gocamelCase(string): options = ['r', 'u', 'i', 'g', 'b'] while True: # keep asking for valid input - q1 = ("(R)epeated value? has less then (2^16=65536) option.", - "(B)itarray, repeated column optimized for fast match.", - "(U)nique, (G)eo lat/lon point or (I)gnore ? r/b/u/g/i?." + q1 = ( + "(R)epeated value? has less then (2^16=65536) option.", + "(B)itarray, repeated column optimized for fast match.", + "(U)nique, (G)eo lat/lon point or (I)gnore ? r/b/u/g/i?." ) action = input(f"idx:{index} is {k} {q1}") # noqa if action == '': @@ -104,8 +105,8 @@ def gocamelCase(string): # same as repeated but with some extra bitarray stuff repeated.append(kc) repeated_org.append(k) - bitarray.append(k) - bitarray.append(k) + bitarray.append(kc) + bitarray_org.append(k) else: print('invalid input') sys.exit(-1) @@ -138,7 +139,19 @@ def gocamelCase(string): initColumntemplate = env.get_template('initColumn.template.jinja2') for c in repeated: - initRepeatColumns.append(initColumntemplate.render(columnname=c)) + initRepeatColumns.append( + initColumntemplate.render( + columnName=c, bitarraymap=c in bitarray) + ) + +# create bitarrays with item labels for column values. +bitArrayStores = [] +bitArrayGetters = [] +bitArrayStoreTemplate = env.get_template('storebitarray.template.jinja2') +bitArrayGetTemplate = env.get_template('bitarrayGetter.template.jinja2') +for r in bitarray: + bitArrayStores.append(bitArrayStoreTemplate.render(columnName=r)) + bitArrayGetters.append(bitArrayGetTemplate.render(columnName=r)) # create ItemFull struct fields columnsItemIn = [] @@ -178,14 +191,11 @@ def gocamelCase(string): shrinkvartemplate = env.get_template('shrinkVars.jinja2') shrinktemplate = env.get_template('shrinkColumn.jinja2') for c in repeated: - shrinkVars.append(shrinkvartemplate.render(column=c)) + shrinkVars.append( + shrinkvartemplate.render(column=c, bitarray=c in bitarray)) shrinkItems.append(shrinktemplate.render(column=c)) -bitArrayGetters = [] -bitArrayStore = [] - - # create the actual shrinked/expand Item fields. shrinkItemFields = [] expandItemFields = [] @@ -241,10 +251,9 @@ def gocamelCase(string): for c, co in zip(allcolumns, allcolumns_org): if c in ignored: continue - txt = rtempl.render(co=co, column=c) + txt = rtempl.render(co=co, columnName=c, bitarray=c in bitarray) registerFilters.append(txt) - sortColumns = [] sortTemplate = env.get_template('sortfunc.jinja2') @@ -294,6 +303,8 @@ def gocamelCase(string): sortColumns=''.join(sortColumns), indexcolumn=allcolumns[index], geometryGetter=geometryGetter, + bitArrayStores=''.join(bitArrayStores), + bitArrayGetters=''.join(bitArrayGetters), ) f = open('model.go', 'w') diff --git a/extras/templates/bitarrayGetter.template.jinja2 b/extras/templates/bitarrayGetter.template.jinja2 new file mode 100644 index 0000000..f04b600 --- /dev/null +++ b/extras/templates/bitarrayGetter.template.jinja2 @@ -0,0 +1,19 @@ +// GetBitArray{{columnName}} for given v string see if there is +// a bitarray created. +func GetBitArray{{columnName}}(v string) (bitarray.BitArray, error) { + + bpi, ok := {{columnName}}IdxMap[v] + + if !ok { + return nil, errors.New("no bitarray filter found for column value {{columnName}}") + } + + ba, ok := {{columnName}}Items[bpi] + + if !ok { + return nil, errors.New("no bitarray filter found for column idx value {{columnName}}") + } + + return ba, nil +} + diff --git a/extras/templates/initColumn.template.jinja2 b/extras/templates/initColumn.template.jinja2 index f9a4052..66a4e25 100644 --- a/extras/templates/initColumn.template.jinja2 +++ b/extras/templates/initColumn.template.jinja2 @@ -1,5 +1,7 @@ - {{columnname}}Tracker = 0 - {{columnname}}IdxMap = make(fieldIdxMap) - {{columnname}} = make(fieldMapIdx) - + {{columnName}}Tracker = 0 + {{columnName}}IdxMap = make(fieldIdxMap) + {{columnName}} = make(fieldMapIdx) +{% if bitarraymap %} + {{columnName}}Items = make(fieldItemsMap) +{% endif %} diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index d6b41d0..63370e6 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -7,7 +7,20 @@ ItemOut, defines how and which fields are exported out of the API. It is possible to ignore input columns - Optionally bitarrays are created. + Repeated values are stored in maps with int numbers + as keys. Optionally bitarrays are created for reapeated + column values to do fast bit-wise filtering. + + A S2 geo index in created for lat, lon values. + + Unique values are stored as-is. + + The generated codes leaves room to create custom + index functions yourself to create an API with an + < 1 ms response time for your specific needs. + + This codebase solves: I need to have an API on this + tabular dataset fast! */ package main @@ -18,6 +31,7 @@ import ( "strconv" "strings" "sync" + "errors" "github.com/Workiva/go-datastructures/bitarray" ) @@ -53,6 +67,7 @@ var lock = sync.RWMutex{} // bitArray Lock var balock = sync.RWMutex{} + func init() { {{initRepeatColumns}} @@ -115,21 +130,22 @@ func (i Item) StoreBitArrayColumns() { lock.RLock() defer lock.RUnlock() - /* + {% if bitArrayStores is defined %} + var ba bitarray.BitArray + var ok bool - var ba = birarray.BitArray - var ok = error + {{ bitArrayStores }} + {% endif %} + + /* // Column Buurtcode has byte arrays for ba, ok = BuurtcodeItems[i.Buurtcode] if !ok { ba = bitarray.NewSparseBitArray() BuurtcodeItems[i.Buurtcode] = ba } - ba.SetBit(uint64(i.Label)) - - TODO ADD BIT-ARRAY COLUMNS. */ } @@ -234,24 +250,29 @@ var RegisterReduce registerReduce var RegisterBitArray registerBitArray // ValidateRegsiters validate exposed columns do match filter names -func validateRegisters() { +func validateRegisters() error { var i = ItemOut{} var filters = []string{"match", "contains", "startswith"} for _, c := range i.Columns() { for _, f := range filters { if _, ok := RegisterFuncMap[f+"-"+c]; !ok { - log.Fatal(c + " is missing in RegisterMap") + return errors.New(c + " is missing in RegisterMap") } } } + return nil } +{{bitArrayGetters}} + + func init() { RegisterFuncMap = make(registerFuncType) RegisterGroupBy = make(registerGroupByFunc) RegisterGetters = make(registerGettersMap) RegisterReduce = make(registerReduce) + RegisterBitArray = make(registerBitArray) // register search filter. //RegisterFuncMap["search"] = 'EDITYOURSELF' diff --git a/extras/templates/registerFilters.jinja2 b/extras/templates/registerFilters.jinja2 index 299c12c..24f4264 100644 --- a/extras/templates/registerFilters.jinja2 +++ b/extras/templates/registerFilters.jinja2 @@ -1,8 +1,11 @@ - //register filters for {{column}} - RegisterFuncMap["match-{{co}}"] = Filter{{column}}Match - RegisterFuncMap["contains-{{co}}"] = Filter{{column}}Contains - RegisterFuncMap["startswith-{{co}}"] = Filter{{column}}StartsWith - RegisterGetters["{{co}}"] = Getters{{column}} - RegisterGroupBy["{{co}}"] = Getters{{column}} + //register filters for {{columnName}} + RegisterFuncMap["match-{{co}}"] = Filter{{columnName}}Match + RegisterFuncMap["contains-{{co}}"] = Filter{{columnName}}Contains + RegisterFuncMap["startswith-{{co}}"] = Filter{{columnName}}StartsWith + RegisterGetters["{{co}}"] = Getters{{columnName}} + RegisterGroupBy["{{co}}"] = Getters{{columnName}} +{% if bitarray %} + RegisterBitArray["{{co}}"] = GetBitArray{{columnName}} +{% endif %} diff --git a/extras/templates/shrinkVars.jinja2 b/extras/templates/shrinkVars.jinja2 index bdc61fe..11684a1 100644 --- a/extras/templates/shrinkVars.jinja2 +++ b/extras/templates/shrinkVars.jinja2 @@ -2,4 +2,7 @@ var {{column}}Tracker uint16 var {{column}}IdxMap fieldIdxMap var {{column}} fieldMapIdx +{% if bitarray %} +var {{column}}Items fieldItemsMap +{% endif %} diff --git a/extras/templates/storebitarray.template.jinja2 b/extras/templates/storebitarray.template.jinja2 new file mode 100644 index 0000000..f9494b1 --- /dev/null +++ b/extras/templates/storebitarray.template.jinja2 @@ -0,0 +1,9 @@ + + // Column {{columnName}} has byte arrays for + ba, ok = {{columnName}}Items[i.{{columnName}}] + if !ok { + ba = bitarray.NewSparseBitArray() + {{columnName}}Items[i.{{columnName}}] = ba + } + + ba.SetBit(uint64(i.Label)) diff --git a/operations.go b/operations.go index 2f10989..1041655 100644 --- a/operations.go +++ b/operations.go @@ -5,6 +5,10 @@ import ( "fmt" "net/http" "net/url" + + // "reflect" + "errors" + "log" "sort" "log" @@ -522,6 +526,7 @@ func bitArrayFilter( if len(combinedBitArrays) > 0 { bitArrayResult = combinedBitArrays[0] } else { + log.Println("no bitarrays found") return nil, errors.New("no bitarray found") } From fda371ff8875b825574086898d1fada5c4924ac9 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Fri, 5 Feb 2021 00:27:23 +0100 Subject: [PATCH 28/54] add custom groupby --- operations.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/operations.go b/operations.go index 1041655..2f4df98 100644 --- a/operations.go +++ b/operations.go @@ -203,12 +203,12 @@ func parseURLParameters(r *http.Request) (Query, error) { parameter, found := urlItems["groupby"] if found && parameter[0] != "" { - _, funcFound := RegisterGroupBy[parameter[0]] - if !funcFound { - return Query{}, errors.New("Invalid groupby parameter") + _, funcFound1 := RegisterGroupBy[parameter[0]] + _, funcFound2 := RegisterGroupByCustom[parameter[0]] + if !funcFound1 && !funcFound2 { + return Query{}, errors.New("invalid groupby parameter") } groupBy = parameter[0] - } // Check and validate reduce parameter From 0d525a52fae97b80622ec725af4a0858dd439468 Mon Sep 17 00:00:00 2001 From: Stephan Preeker Date: Fri, 5 Feb 2021 00:52:29 +0100 Subject: [PATCH 29/54] add missing return --- http_handlers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http_handlers.go b/http_handlers.go index 6d1cd55..eeefc44 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -130,7 +130,7 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group return } - // Cache group by repsonse + // Cache group-by reduce repsonse cacheLock.Lock() cacheKey, _ := query.CacheKey() GroupByBodyCache[cacheKey] = result From b893f602a052923cc1fae02ddeb48891ce16321e Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Wed, 10 Feb 2021 22:38:22 +0100 Subject: [PATCH 30/54] validated and fixed reason for missing schools --- extras/create_model.py | 570 ++++++++++++++++++++------------------ extras/create_model_v2.py | 315 --------------------- 2 files changed, 304 insertions(+), 581 deletions(-) delete mode 100644 extras/create_model_v2.py diff --git a/extras/create_model.py b/extras/create_model.py index 19147ae..31a2177 100644 --- a/extras/create_model.py +++ b/extras/create_model.py @@ -1,277 +1,315 @@ -### First version is going to assume everything is a string -### also known as string theory:p - -### column with the name "value" or "index" will be used as index -### else the first column will be set as index, when index is enabled. -### this can be changed later in the generated model.go file +# -*- coding: utf-8 -*- +""" +Load first rows from csv, ask some questions +and generate a models.go to jumpstart +your lambda_db project for the given csv file -import csv -import sys -from filereader import create_reader, supported_fileformats +models.go contains all the field information +and functions of rows in your data. +- Repeated option to store repeated + values in a map and each individual items + only stores uint16 reference to map key. -def create_struct(item): - start = "type Item struct {\n" - # TODO add type - lines= [f'{k.capitalize()} string `json:"{k.lower()}"`' for k, v in item.items()] - stop = "\n}\n" - return start + "\n".join(lines) + stop +- BitArray option which is like Repeated + value but also creates a map[key]bitmap for all + items containing field value. Makes it possible + to do fast 'match' lookups. -def create_columns(item): - start = """ - func (i Item) Columns() []string { - return []string{ - """ - lines = [f'"{k.lower()}",' for k in item.keys()] - stop = """\n}\n}""" - return start + "\n".join(lines) + stop +python create_model.py your.csv +""" +import csv +import sys -def create_row(item): - start = """ - func (i Item) Row() []string { - return []string{ +from re import sub +from jinja2 import Environment, FileSystemLoader + +if '-f' in sys.argv: + filename = str(sys.argv[sys.argv.index('-f')+1]) +else: + filename = "items.csv" + +with open(filename) as f: + reader = csv.DictReader(f) + row = dict(next(reader)) + +env = Environment( + loader=FileSystemLoader('./templates'), +) + +# keep track of all column names and all original names in csv +allcolumns = [] +allcolumns_org = [] +repeated = [] +repeated_org = [] +bitarray = [] +bitarray_org = [] +unique = [] +unique = [] +unique_org = [] +ignored = [] +ignored_org = [] +geocolumns = [] +geocolumns_org = [] + + +def gocamelCase(string): + """convert string to camelCase + + woning_type -> WoningType """ - lines = [f"i.{k.capitalize()}," for k in item.keys()] - stop = """\n}\n}""" - return start + "\n".join(lines) + stop - - -def get_index_column(item): - special_columns = ["value", "index"] - for column in special_columns: - if column in item: - return column - - # we tried, let's return the first column - n = iter(item.keys()) - return next(n) - - -def create_getindex(item): - index_column = get_index_column(item) - start = """ - func (i Item) GetIndex() string { - return """ - middle = f"i.{index_column.capitalize()}" - stop = """\n}""" - return start + middle + stop - - -def create_filter_contains(column): - return ( - f"func Filter{column.capitalize()}Contains(i *Item, s string) bool" + "{" + "\n" - f"return strings.Contains(i.{column.capitalize()}, s)" - "\n" + "}" - ) - -def create_filter_startswith(column): - return ( - f"func Filter{column.capitalize()}StartsWith(i *Item, s string) bool" + "{" + "\n" - f"return strings.HasPrefix(i.{column.capitalize()}, s)" - "\n" + "}" - ) - -def create_filter_match(column): - return ( - f"func Filter{column.capitalize()}Match(i *Item, s string) bool" + "{" + "\n" - f"return i.{column.capitalize()} == s" - "\n" + "}" - ) - - -def create_getter(column): - return ( - f"func Getters{column.capitalize()}(i *Item) string" + "{" + "\n" - f"return i.{column.capitalize()}" - "\n" + "}" + string = sub(r"(_|-)+", " ", string).title().replace(" ", "") + return string + + +# ask some questions about columns. +index = 0 +for k in row.keys(): + + # go camelcase column names + kc = gocamelCase(k) + + options = ['r', 'u', 'i', 'g', 'b'] + while True: + # keep asking for valid input + q1 = ( + "(R)epeated value? has less then (2^16=65536) option.", + "(B)itarray, repeated column optimized for fast match.", + "(U)nique, (G)eo lat/lon point or (I)gnore ? r/b/u/g/i?." + ) + action = input(f"idx:{index} is {k} {q1}") # noqa + if action == '': + print(f"pick one from {options}") + continue + if action not in options: + continue + break + + if action == 'r': + repeated.append(kc) + repeated_org.append(k) + elif action == 'u': + unique.append(kc) + unique_org.append(k) + elif action == 'i': + ignored.append(kc) + ignored_org.append(k) + elif action == 'g': + geocolumns.append(kc) + geocolumns_org.append(k) + unique.append(kc) + unique_org.append(k) + elif action == 'b': + # same as repeated but with some extra bitarray stuff + repeated.append(kc) + repeated_org.append(k) + bitarray.append(kc) + bitarray_org.append(k) + else: + print('invalid input') + sys.exit(-1) + + allcolumns.append(kc) + allcolumns_org.append(k) + index += 1 + +# ask for a index column +while True: + # keep asking for valid input + index = input(f"which column is idx? 0 - {len(allcolumns) - 1} ") + try: + index = int(index) + + if allcolumns[index] in ignored: + print('Selected an ignored column for index') + raise ValueError + + if -1 < index < len(allcolumns): + break + + except ValueError: + continue + + print('try again..') + +# setup initial data structs for each repeated column +initRepeatColumns = [] +initColumntemplate = env.get_template('initColumn.template.jinja2') + +for c in repeated: + initRepeatColumns.append( + initColumntemplate.render( + columnName=c, bitarraymap=c in bitarray) ) +# create bitarrays with item labels for column values. +bitArrayStores = [] +bitArrayGetters = [] +bitArrayStoreTemplate = env.get_template('storebitarray.template.jinja2') +bitArrayGetTemplate = env.get_template('bitarrayGetter.template.jinja2') +for r in bitarray: + bitArrayStores.append(bitArrayStoreTemplate.render(columnName=r)) + bitArrayGetters.append(bitArrayGetTemplate.render(columnName=r)) + +# create ItemFull struct fields +columnsItemIn = [] +jsonColumn = env.get_template('itemFullColumn.jinja2') +for c1, c2 in zip(allcolumns, allcolumns_org): + onerow = jsonColumn.render(c1=c1, c2=c2) + columnsItemIn.append(onerow) + +# create ItemFull struct fields +columnsItemOut = [] +jsonColumn = env.get_template('itemFullColumn.jinja2') +for c1, c2 in zip(allcolumns, allcolumns_org): + + if c1 in ignored: + continue + + onerow = jsonColumn.render(c1=c1, c2=c2) + columnsItemOut.append(onerow) + +# create Item struct fields +columnsItem = [] +for c1, c2 in zip(allcolumns, allcolumns_org): + + if c1 in ignored: + continue -def create_reduce(column): - return """ - func reduceCount(items Items) map[string]string { - result := make(map[string]string) - result["count"] = strconv.Itoa(len(items)) - return result -} - """ - -def create_init_register(): - return """ - RegisterFuncMap = make(registerFuncType) - RegisterGroupBy = make(registerGroupByFunc) - RegisterGetters = make(registerGettersMap) - RegisterReduce = make(registerReduce) - - """ - -def create_register_match_func(column): - return f'RegisterFuncMap["match-{column.lower()}"] = Filter{column.capitalize()}Match' - - -def create_register_contains_func(column): - return f'RegisterFuncMap["contains-{column.lower()}"] = Filter{column.capitalize()}Contains' - - -def create_register_startswith_func(column): - return f'RegisterFuncMap["startswith-{column.lower()}"] = Filter{column.capitalize()}StartsWith' - - -def create_register_getter(column): - return f'RegisterGetters["{column.lower()}"] = Getters{column.capitalize()}' - - -def create_register_groupby(column): - return f'RegisterGroupBy["{column.lower()}"] = Getters{column.capitalize()}' - - -def create_register_reduce(column): - return 'RegisterReduce["count"] = reduceCount' - - -def create_grouped(): - return """ -type GroupedOperations struct { - Funcs registerFuncType - GroupBy registerGroupByFunc - Getters registerGettersMap - Reduce registerReduce -} - -var Operations GroupedOperations - -var RegisterFuncMap registerFuncType -var RegisterGroupBy registerGroupByFunc -var RegisterGetters registerGettersMap -var RegisterReduce registerReduce -""" - -def create_sortby_line_plus(column): - return f'"{column.lower()}"' + ": func(i, j int) bool { return " + f"items[i].{column.capitalize()} < items[j].{column.capitalize()} " + " }," - -def create_sortby_line_minus(column): - return f'"-{column.lower()}"' + ": func(i, j int) bool { return " + f"items[i].{column.capitalize()} > items[j].{column.capitalize()} " + " }," - -def create_sortby(row): - start = """func sortBy(items Items, sortingL []string) (Items, []string) { - sortFuncs := map[string]func(int, int) bool{""" - lines = [] - for k in row.keys(): - lines.append(create_sortby_line_plus(k)) - lines.append(create_sortby_line_minus(k)) - lines.append("\n") - lines.append("}") - end = """ - for _, sortFuncName := range sortingL { - sortFunc := sortFuncs[sortFuncName] - sort.Slice(items, sortFunc) - } - // TODO must be nicer way - keys := []string{} - for key := range sortFuncs { - keys = append(keys, key) - } - - return items, keys - }""" - return start + "\n".join(lines) + end - -if __name__ == "__main__": - - filename = str(sys.argv[sys.argv.index('-f')+1]) if '-f' in sys.argv else "items.csv" - file_format = str(sys.argv[sys.argv.index('-format')+1]) if '-format' in sys.argv else "csv" - - if file_format not in supported_fileformats(): - print(f"{file_format} not part of supported file formats {','.join(supported_fileformats())}") - sys.exit() - - with open(filename) as f: - reader = create_reader(f, file_format) - row = dict(next(reader)) - - print("package main") - print() - - print("import (") - print('"sort"') - print('"strconv"') - print('"strings"') - print(")") - print(create_struct(row)) - print() - print(create_columns(row)) - print() - print(create_row(row)) - print() - print(create_getindex(row)) - print() - - print("// contain filters") - for k in row.keys(): - print(create_filter_contains(k)) - - print() - print("// startswith filters") - for k in row.keys(): - print(create_filter_startswith(k)) - - print() - print("// match filters") - for k in row.keys(): - print(create_filter_match(k)) - - print() - print("// reduce functions") - print(create_reduce(None)) - - print() - print("// getters") - for k in row.keys(): - print(create_getter(k)) - print() - - - print(create_grouped()) - print("func init() {") - print(create_init_register()) - - print() - print("// register match filters") - for k in row.keys(): - print(create_register_match_func(k)) - - print() - print("// register contains filters") - for k in row.keys(): - print(create_register_contains_func(k)) - - print() - print("// register startswith filters") - for k in row.keys(): - print(create_register_startswith_func(k)) - print() - - print() - print("// register getters ") - for k in row.keys(): - print(create_register_getter(k)) - print() - - print() - print("// register groupby ") - for k in row.keys(): - print(create_register_groupby(k)) - print() - - - print() - print("// register reduce functions") - print(create_register_reduce(None)) - - print("}") - - print(create_sortby(row)) - print() - + onerow = f"\t{c1} string\n" + if c1 in repeated: + onerow = f"\t{c1} uint16\n" + columnsItem.append(onerow) + + +# create Shrink code for repeated fields +# where we map uint16 to a string value. +shrinkVars = [] +shrinkItems = [] +shrinkvartemplate = env.get_template('shrinkVars.jinja2') +shrinktemplate = env.get_template('shrinkColumn.jinja2') +for c in repeated: + shrinkVars.append( + shrinkvartemplate.render(column=c, bitarray=c in bitarray)) + shrinkItems.append(shrinktemplate.render(column=c)) + + +# create the actual shrinked/expand Item fields. +shrinkItemFields = [] +expandItemFields = [] + +for c in allcolumns: + + if c in ignored: + continue + + if c in repeated: + # string to unint + shrinkItemFields.append(f"\t\t{c}IdxMap[i.{c}],\n") + # unint back to string + expandItemFields.append(f"\t\t{c}[i.{c}],\n") + else: + shrinkItemFields.append(f"\t\ti.{c},\n") + expandItemFields.append(f"\t\ti.{c},\n") + + +# ItemIn Columns +inColumns = [] +for c in allcolumns_org: + inColumns.append(f'\t\t"{c}",\n') + +# ItemOut Columns +outColumns = [] +for cc, c in zip(allcolumns, allcolumns_org): + # cc CamelCaseColumn. + if cc in ignored: + continue + outColumns.append(f'\t\t"{c}",\n') + +# create column filters. +# match, startswith, contains etc + +columnFilters = [] +filtertemplate = env.get_template("filters.jinja2") + +for c in allcolumns: + if c in ignored: + continue + + lookup = f"i.{c}" + if c in repeated: + lookup = f"{c}[i.{c}]" + + txt = filtertemplate.render(column=c, lookup=lookup) + columnFilters.append(txt) + +registerFilters = [] +rtempl = env.get_template('registerFilters.jinja2') +# register filters +for c, co in zip(allcolumns, allcolumns_org): + if c in ignored: + continue + txt = rtempl.render(co=co, columnName=c, bitarray=c in bitarray) + registerFilters.append(txt) + +sortColumns = [] +sortTemplate = env.get_template('sortfunc.jinja2') + +# create sort functions +for c, co in zip(allcolumns, allcolumns_org): + if c in ignored: + continue + + c1 = f"items[i].{c} < items[j].{c}" + c2 = f"items[i].{c} > items[j].{c}" + + if c in repeated: + c1 = f"{c}[items[i].{c}] < {c}[items[j].{c}]" + c2 = f"{c}[items[i].{c}] > {c}[items[j].{c}]" + + txt = sortTemplate.render(co=co, c1=c1, c2=c2) + sortColumns.append(txt) + + +csv_columns = [] +for c in allcolumns: + csv_columns.append(f'\t"{c}",\n') + + +# Finally render the model.go template +modeltemplate = env.get_template('model.template.jinja2') + +geometryGetter = '""' +print('GEOCOLUMNS: ' + " ".join(geocolumns)) +if len(geocolumns) == 1: + geometryGetter = f"Getters{geocolumns[0]}(&i)" + +output = modeltemplate.render( + initRepeatColumns=''.join(initRepeatColumns), + columnsItemIn=''.join(columnsItemIn), + columnsItemOut=''.join(columnsItemOut), + columnsItem=''.join(columnsItem), + shrinkVars=''.join(shrinkVars), + shrinkItems=''.join(shrinkItems), + shrinkItemFields=''.join(shrinkItemFields), + expandItemFields=''.join(expandItemFields), + csv_columns=''.join(csv_columns), + inColumns=''.join(inColumns), + outColumns=''.join(outColumns), + columnFilters=''.join(columnFilters), + registerFilters=''.join(registerFilters), + sortColumns=''.join(sortColumns), + indexcolumn=allcolumns[index], + geometryGetter=geometryGetter, + bitArrayStores=''.join(bitArrayStores), + bitArrayGetters=''.join(bitArrayGetters), +) + +f = open('model.go', 'w') +f.write(output) +f.close() + +print('saved in model.go') +print('!!NOTE!! edit the default search filter') diff --git a/extras/create_model_v2.py b/extras/create_model_v2.py deleted file mode 100644 index 31a2177..0000000 --- a/extras/create_model_v2.py +++ /dev/null @@ -1,315 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Load first rows from csv, ask some questions -and generate a models.go to jumpstart -your lambda_db project for the given csv file - -models.go contains all the field information -and functions of rows in your data. - -- Repeated option to store repeated - values in a map and each individual items - only stores uint16 reference to map key. - -- BitArray option which is like Repeated - value but also creates a map[key]bitmap for all - items containing field value. Makes it possible - to do fast 'match' lookups. - - -python create_model.py your.csv -""" - -import csv -import sys - -from re import sub -from jinja2 import Environment, FileSystemLoader - -if '-f' in sys.argv: - filename = str(sys.argv[sys.argv.index('-f')+1]) -else: - filename = "items.csv" - -with open(filename) as f: - reader = csv.DictReader(f) - row = dict(next(reader)) - -env = Environment( - loader=FileSystemLoader('./templates'), -) - -# keep track of all column names and all original names in csv -allcolumns = [] -allcolumns_org = [] -repeated = [] -repeated_org = [] -bitarray = [] -bitarray_org = [] -unique = [] -unique = [] -unique_org = [] -ignored = [] -ignored_org = [] -geocolumns = [] -geocolumns_org = [] - - -def gocamelCase(string): - """convert string to camelCase - - woning_type -> WoningType - """ - string = sub(r"(_|-)+", " ", string).title().replace(" ", "") - return string - - -# ask some questions about columns. -index = 0 -for k in row.keys(): - - # go camelcase column names - kc = gocamelCase(k) - - options = ['r', 'u', 'i', 'g', 'b'] - while True: - # keep asking for valid input - q1 = ( - "(R)epeated value? has less then (2^16=65536) option.", - "(B)itarray, repeated column optimized for fast match.", - "(U)nique, (G)eo lat/lon point or (I)gnore ? r/b/u/g/i?." - ) - action = input(f"idx:{index} is {k} {q1}") # noqa - if action == '': - print(f"pick one from {options}") - continue - if action not in options: - continue - break - - if action == 'r': - repeated.append(kc) - repeated_org.append(k) - elif action == 'u': - unique.append(kc) - unique_org.append(k) - elif action == 'i': - ignored.append(kc) - ignored_org.append(k) - elif action == 'g': - geocolumns.append(kc) - geocolumns_org.append(k) - unique.append(kc) - unique_org.append(k) - elif action == 'b': - # same as repeated but with some extra bitarray stuff - repeated.append(kc) - repeated_org.append(k) - bitarray.append(kc) - bitarray_org.append(k) - else: - print('invalid input') - sys.exit(-1) - - allcolumns.append(kc) - allcolumns_org.append(k) - index += 1 - -# ask for a index column -while True: - # keep asking for valid input - index = input(f"which column is idx? 0 - {len(allcolumns) - 1} ") - try: - index = int(index) - - if allcolumns[index] in ignored: - print('Selected an ignored column for index') - raise ValueError - - if -1 < index < len(allcolumns): - break - - except ValueError: - continue - - print('try again..') - -# setup initial data structs for each repeated column -initRepeatColumns = [] -initColumntemplate = env.get_template('initColumn.template.jinja2') - -for c in repeated: - initRepeatColumns.append( - initColumntemplate.render( - columnName=c, bitarraymap=c in bitarray) - ) - -# create bitarrays with item labels for column values. -bitArrayStores = [] -bitArrayGetters = [] -bitArrayStoreTemplate = env.get_template('storebitarray.template.jinja2') -bitArrayGetTemplate = env.get_template('bitarrayGetter.template.jinja2') -for r in bitarray: - bitArrayStores.append(bitArrayStoreTemplate.render(columnName=r)) - bitArrayGetters.append(bitArrayGetTemplate.render(columnName=r)) - -# create ItemFull struct fields -columnsItemIn = [] -jsonColumn = env.get_template('itemFullColumn.jinja2') -for c1, c2 in zip(allcolumns, allcolumns_org): - onerow = jsonColumn.render(c1=c1, c2=c2) - columnsItemIn.append(onerow) - -# create ItemFull struct fields -columnsItemOut = [] -jsonColumn = env.get_template('itemFullColumn.jinja2') -for c1, c2 in zip(allcolumns, allcolumns_org): - - if c1 in ignored: - continue - - onerow = jsonColumn.render(c1=c1, c2=c2) - columnsItemOut.append(onerow) - -# create Item struct fields -columnsItem = [] -for c1, c2 in zip(allcolumns, allcolumns_org): - - if c1 in ignored: - continue - - onerow = f"\t{c1} string\n" - if c1 in repeated: - onerow = f"\t{c1} uint16\n" - columnsItem.append(onerow) - - -# create Shrink code for repeated fields -# where we map uint16 to a string value. -shrinkVars = [] -shrinkItems = [] -shrinkvartemplate = env.get_template('shrinkVars.jinja2') -shrinktemplate = env.get_template('shrinkColumn.jinja2') -for c in repeated: - shrinkVars.append( - shrinkvartemplate.render(column=c, bitarray=c in bitarray)) - shrinkItems.append(shrinktemplate.render(column=c)) - - -# create the actual shrinked/expand Item fields. -shrinkItemFields = [] -expandItemFields = [] - -for c in allcolumns: - - if c in ignored: - continue - - if c in repeated: - # string to unint - shrinkItemFields.append(f"\t\t{c}IdxMap[i.{c}],\n") - # unint back to string - expandItemFields.append(f"\t\t{c}[i.{c}],\n") - else: - shrinkItemFields.append(f"\t\ti.{c},\n") - expandItemFields.append(f"\t\ti.{c},\n") - - -# ItemIn Columns -inColumns = [] -for c in allcolumns_org: - inColumns.append(f'\t\t"{c}",\n') - -# ItemOut Columns -outColumns = [] -for cc, c in zip(allcolumns, allcolumns_org): - # cc CamelCaseColumn. - if cc in ignored: - continue - outColumns.append(f'\t\t"{c}",\n') - -# create column filters. -# match, startswith, contains etc - -columnFilters = [] -filtertemplate = env.get_template("filters.jinja2") - -for c in allcolumns: - if c in ignored: - continue - - lookup = f"i.{c}" - if c in repeated: - lookup = f"{c}[i.{c}]" - - txt = filtertemplate.render(column=c, lookup=lookup) - columnFilters.append(txt) - -registerFilters = [] -rtempl = env.get_template('registerFilters.jinja2') -# register filters -for c, co in zip(allcolumns, allcolumns_org): - if c in ignored: - continue - txt = rtempl.render(co=co, columnName=c, bitarray=c in bitarray) - registerFilters.append(txt) - -sortColumns = [] -sortTemplate = env.get_template('sortfunc.jinja2') - -# create sort functions -for c, co in zip(allcolumns, allcolumns_org): - if c in ignored: - continue - - c1 = f"items[i].{c} < items[j].{c}" - c2 = f"items[i].{c} > items[j].{c}" - - if c in repeated: - c1 = f"{c}[items[i].{c}] < {c}[items[j].{c}]" - c2 = f"{c}[items[i].{c}] > {c}[items[j].{c}]" - - txt = sortTemplate.render(co=co, c1=c1, c2=c2) - sortColumns.append(txt) - - -csv_columns = [] -for c in allcolumns: - csv_columns.append(f'\t"{c}",\n') - - -# Finally render the model.go template -modeltemplate = env.get_template('model.template.jinja2') - -geometryGetter = '""' -print('GEOCOLUMNS: ' + " ".join(geocolumns)) -if len(geocolumns) == 1: - geometryGetter = f"Getters{geocolumns[0]}(&i)" - -output = modeltemplate.render( - initRepeatColumns=''.join(initRepeatColumns), - columnsItemIn=''.join(columnsItemIn), - columnsItemOut=''.join(columnsItemOut), - columnsItem=''.join(columnsItem), - shrinkVars=''.join(shrinkVars), - shrinkItems=''.join(shrinkItems), - shrinkItemFields=''.join(shrinkItemFields), - expandItemFields=''.join(expandItemFields), - csv_columns=''.join(csv_columns), - inColumns=''.join(inColumns), - outColumns=''.join(outColumns), - columnFilters=''.join(columnFilters), - registerFilters=''.join(registerFilters), - sortColumns=''.join(sortColumns), - indexcolumn=allcolumns[index], - geometryGetter=geometryGetter, - bitArrayStores=''.join(bitArrayStores), - bitArrayGetters=''.join(bitArrayGetters), -) - -f = open('model.go', 'w') -f.write(output) -f.close() - -print('saved in model.go') -print('!!NOTE!! edit the default search filter') From 280b26ada20a6502596dc6c365a1a10e2eef8eb6 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Mon, 15 Feb 2021 21:33:19 +0100 Subject: [PATCH 31/54] add woning equivalent reduce --- model.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/model.go b/model.go index e2e53d2..046aafe 100644 --- a/model.go +++ b/model.go @@ -1471,6 +1471,20 @@ func reduceCount(items Items) map[string]string { return result } +func reduceWEQ(items Items) map[string]string { + result := make(map[string]string) + weq := 0 + for i := range items { + _weq, err := strconv.ParseInt(items[i].Woningequivalent, 10, 64) + if err != nil { + panic(err) + } + weq += int(_weq) + } + result["woningenquivalent"] = strconv.Itoa(weq) + return result +} + type GroupedOperations struct { Funcs registerFuncType GroupBy registerGroupByFunc From f6e924f4618a1e8cd56d70db7fa81be9d7bf826e Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Mon, 15 Feb 2021 22:02:51 +0100 Subject: [PATCH 32/54] try pgzip --- csv.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csv.go b/csv.go index c9c1c98..43161d1 100644 --- a/csv.go +++ b/csv.go @@ -1,12 +1,12 @@ package main import ( - "compress/gzip" "encoding/json" "errors" "fmt" csv "github.com/JensRantil/go-csv" "github.com/cheggaaa/pb" + "github.com/klauspost/pgzip" "io" "log" "os" @@ -142,7 +142,7 @@ func importCSV(filename string, itemChan ItemsChannel, defer file.Close() bar = NewProgressBar(file) - fz, err := gzip.NewReader(io.TeeReader(file, bar)) + fz, err := pgzip.NewReader(io.TeeReader(file, bar)) if err != nil { return err From ba495a2a1407927e6b749f4e0e91c570c888989d Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Wed, 17 Feb 2021 16:24:08 +0100 Subject: [PATCH 33/54] added bouwjaar --- model.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/model.go b/model.go index 046aafe..e2e53d2 100644 --- a/model.go +++ b/model.go @@ -1471,20 +1471,6 @@ func reduceCount(items Items) map[string]string { return result } -func reduceWEQ(items Items) map[string]string { - result := make(map[string]string) - weq := 0 - for i := range items { - _weq, err := strconv.ParseInt(items[i].Woningequivalent, 10, 64) - if err != nil { - panic(err) - } - weq += int(_weq) - } - result["woningenquivalent"] = strconv.Itoa(weq) - return result -} - type GroupedOperations struct { Funcs registerFuncType GroupBy registerGroupByFunc From fff9bcb1e391511171220a5558d0eda26f492d70 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Wed, 17 Feb 2021 17:31:39 +0100 Subject: [PATCH 34/54] added readlock, moved custom code --- custom.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 custom.go diff --git a/custom.go b/custom.go new file mode 100644 index 0000000..aa432db --- /dev/null +++ b/custom.go @@ -0,0 +1,30 @@ +package main + +import ( + "strconv" +) + +type registerCustomGroupByFunc map[string]func(*Item, ItemsGroupedBy) + +var RegisterGroupByCustom registerCustomGroupByFunc + +func init() { + + RegisterGroupByCustom = make(registerCustomGroupByFunc) + RegisterGroupByCustom["gebruiksdoelen-mixed"] = GroupByGettersGebruiksdoelen + +} + +func reduceWEQ(items Items) map[string]string { + result := make(map[string]string) + weq := 0 + for i := range items { + _weq, err := strconv.ParseInt(items[i].Woningequivalent, 10, 64) + if err != nil { + panic(err) + } + weq += int(_weq) + } + result["woningenquivalent"] = strconv.Itoa(weq) + return result +} From aac4b78c9b984e843910a3619b06a184dba09589 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 9 Mar 2021 10:28:28 +0100 Subject: [PATCH 35/54] update readme --- README.md | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 575b209..91688c4 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,36 @@ # LambdaDB In memory database that uses filters to get the data you need. +Lambda DB has a tiny codebase which does a lot +Lambda is not ment as a persistance storage or a replacement for a traditional +Database but as fast analytics engine cache representation engine. + +powers: https://dego.vng.nl + +## Properties: + +- Insanely fast API. 1ms respsonses +- Fast to setup. +- Easy to deploy. +- Easy to customize. +- Easy export data + +- Implement custom authorized filters. + +## Indexes + +- S2 geoindex for fast point lookup +- Bitarrays +- Mapping + +- Your own special needs indexes! + +## Flow: + +Generate a model and load your data. +The API is generated from your model. +Deploy. + +Condition: Your dataset must fit in memory. Can be used for your needs by changing the `models.go` file to your needs. Creating and registering of the functionality that is needed. @@ -8,6 +39,7 @@ Creating and registering of the functionality that is needed. ### Steps You can start the database with only a csv. Go over steps below, And see the result in your browser. + 1. place csv file, in dir extras. 2. `python3 create_model_v2.py` answer the questions.. 3. go fmt model.go @@ -18,11 +50,9 @@ Go over steps below, And see the result in your browser. 9. curl 127.0.0.1:8128/help/ 10. browser 127.0.0.1:8128/ - 11. instructions curl 127.0.0.1:8128/help/ | python -m json.tool - ### Running sudo docker-compose up --no-deps --build @@ -30,3 +60,14 @@ sudo docker-compose up --no-deps --build promql {instance="lambdadb:8000"} python3 extras/ingestion.py -f movies_subset.tsv -format tsv -dbhost 127.0.0.1:8000 +======= + +1. instructions curl 127.0.0.1:8000/help/ | python -m json.tool + + +### TODO + +- load data directly from a database (periodic) +- use a remote source for CSV +- use some compression faster to load than gzip +- generate swagger API From 041b486b66289a6d1872031e16bbe7cd8216b478 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 9 Mar 2021 17:06:50 +0100 Subject: [PATCH 36/54] fix bug returning raw item json --- README.md | 7 ++++++- extras/templates/model.template.jinja2 | 3 +++ http_handlers.go | 9 ++++++++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 91688c4..aac8537 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ You can start the database with only a csv. Go over steps below, And see the result in your browser. 1. place csv file, in dir extras. -2. `python3 create_model_v2.py` answer the questions.. +2. `python3 create_model_.py` answer the questions. 3. go fmt model.go 4. mv model.go ../ 5. go build @@ -64,10 +64,15 @@ python3 extras/ingestion.py -f movies_subset.tsv -format tsv -dbhost 127.0.0.1: 1. instructions curl 127.0.0.1:8000/help/ | python -m json.tool +### Questions + + ### TODO - load data directly from a database (periodic) +- document the `create_model.py` questions - use a remote source for CSV - use some compression faster to load than gzip - generate swagger API +- Add more tests diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index 63370e6..0afb6c8 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -102,6 +102,9 @@ type Item struct { } +func (i Item) MarshalJSON() ([]byte, error) { + return json.Marshal(i.Serialize()) +} // Shrink create smaller Item using uint16 func (i ItemIn) Shrink(label int) Item { diff --git a/http_handlers.go b/http_handlers.go index eeefc44..613df98 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -370,7 +370,7 @@ type searchResponse struct { MMeta *Meta `json:"meta"` } -func makeResp(items Items) searchResponse { +func outputItems(items Items) ItemsOut { itemsout := make(ItemsOut, 0, len(items)) @@ -379,6 +379,13 @@ func makeResp(items Items) searchResponse { itemsout = append(itemsout, &orgItem) } + return itemsout +} + +func makeResp(items Items) searchResponse { + + itemsout := outputItems(items) + fields := []ShowItem{} columns := ItemOut{}.Columns() for _, column := range columns { From 1c9f9d13313e099ec0f6c3b95ff9e883130807a6 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Wed, 10 Mar 2021 16:53:25 +0100 Subject: [PATCH 37/54] allow reduce without groupby --- geo.go | 6 +++--- http_handlers.go | 22 ++++++++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/geo.go b/geo.go index 9419db1..86ab199 100644 --- a/geo.go +++ b/geo.go @@ -145,9 +145,9 @@ func SearchGeoItems(cu s2.CellUnion) labeledItems { cu.Normalize() - for i, c := range cu { - fmt.Printf("%d %s \n", i, c) - } + //for i, c := range cu { + // fmt.Printf("%d %s \n", i, c) + //} min := S2CELLS.Seek(cu[0].ChildBegin()) max := S2CELLS.Seek(cu[len(cu)-1].ChildEnd()) diff --git a/http_handlers.go b/http_handlers.go index 613df98..b36c884 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -3,7 +3,6 @@ package main import ( "encoding/csv" "encoding/json" - "errors" "fmt" "index/suffixarray" "log" @@ -46,7 +45,8 @@ func hanleQueryError(err error, w http.ResponseWriter) { json.NewEncoder(w).Encode(response) } -type GroupByResult map[string]map[string]string +type ReduceResult map[string]string +type GroupByResult map[string]ReduceResult var GroupByBodyCache = make(map[string]GroupByResult) var GroupByHeaderCache = make(map[string]HeaderData) @@ -99,6 +99,16 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group setHeader(items, w, query, queryTime) + // We want to count all filtered items. + // and we do not have a groupby + if query.GroupBy == "" && query.Reduce != "" { + reduceFunc, _ := operations.Reduce[query.Reduce] + result := reduceFunc(items) + json.NewEncoder(w).Encode(result) + return + } + + // no groupby return all rows if query.GroupBy == "" { if query.ReturnFormat == "csv" { writeCSV(items, w) @@ -110,17 +120,13 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group return } + // groupby items on column groupByItems := groupByRunner(items, query.GroupBy) items = nil if query.Reduce != "" { result := make(GroupByResult) - reduceFunc, reduceFuncFound := operations.Reduce[query.Reduce] - if !reduceFuncFound { - err = errors.New("invalid reduce parameter value") - hanleQueryError(err, w) - return - } + reduceFunc, _ := operations.Reduce[query.Reduce] for key, val := range groupByItems { result[key] = reduceFunc(val) } From 05a3627f6dbedf7b5235d377d8069799bf327a0f Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Mon, 15 Mar 2021 10:32:05 +0100 Subject: [PATCH 38/54] add header column to csv --- http_handlers.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/http_handlers.go b/http_handlers.go index b36c884..f95ede5 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -260,6 +260,11 @@ func makeIndex() { func writeCSV(items Items, w http.ResponseWriter) { writer := csv.NewWriter(w) + + columns := ItemOut{}.Columns() + writer.Write(columns) + writer.Flush() + for i := range items { writer.Write(items[i].Row()) writer.Flush() From c2ff2d7ce962d4c552eaa491383e8701b2eaff09 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Mon, 22 Mar 2021 15:06:26 +0100 Subject: [PATCH 39/54] working merged build. labeledItems renamed to Items --- geo.go | 10 ++++---- http_handlers.go | 16 ++++--------- main.go | 8 +++---- operations.go | 61 ++++++++++-------------------------------------- store.go | 5 ++-- 5 files changed, 27 insertions(+), 73 deletions(-) diff --git a/geo.go b/geo.go index 86ab199..78f2d36 100644 --- a/geo.go +++ b/geo.go @@ -123,12 +123,12 @@ func (i Item) GeoIndex(label int) error { } // Simple search algo -func SearchOverlapItems(items *labeledItems, cu s2.CellUnion) labeledItems { +func SearchOverlapItems(items *Items, cu s2.CellUnion) Items { s2Lock.RLock() defer s2Lock.RUnlock() - newItems := labeledItems{} + newItems := Items{} for k, i := range *items { if cu.ContainsCellID(S2CELLMAP[k]) { @@ -138,10 +138,10 @@ func SearchOverlapItems(items *labeledItems, cu s2.CellUnion) labeledItems { return newItems } -// Given only a cell Union return labeldItems -func SearchGeoItems(cu s2.CellUnion) labeledItems { +// Given only a cell Union return Items +func SearchGeoItems(cu s2.CellUnion) Items { - newItems := labeledItems{} + newItems := Items{} cu.Normalize() diff --git a/http_handlers.go b/http_handlers.go index f95ede5..80dcdf7 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "index/suffixarray" + //"io/ioutil" "log" "net/http" "runtime" @@ -168,7 +169,7 @@ func contextAddRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Groupe strictMode := SETTINGS.Get("strict-mode") == "y" for n, item := range items { - if (*item == Item{}) { + if (*item == ItemIn{}) { fmt.Printf("unable to process item %d of batch\n", n) if strictMode { fmt.Printf("strict mode stopping ingestion of batch\n") @@ -187,7 +188,7 @@ func rmRest(w http.ResponseWriter, r *http.Request) { ITEMS = make(Items, 0, 100*1000) msg := fmt.Sprint("removed items from database") fmt.Printf(WarningColorN, msg) - ITEMS = labeledItems{} + ITEMS = Items{} go func() { time.Sleep(1 * time.Second) @@ -307,12 +308,6 @@ func handleInputStorage(r *http.Request) (string, storageFunc, retrieveFunc, str storagefunc = STORAGEFUNCS[storagename] } - // TODO do not use ReadAll..but do it line by line - s, err := ioutil.ReadAll(fz) - if err != nil { - return - } - retrievefunc, found := RETRIEVEFUNCS[storagename] if !found { storagename := SETTINGS.Get("STORAGEMETHOD") @@ -320,15 +315,14 @@ func handleInputStorage(r *http.Request) (string, storageFunc, retrieveFunc, str } // empty exising ITEMS - ITEMS = labeledItems{} - json.Unmarshal(s, &ITEMS) + ITEMS = Items{} filename := fmt.Sprintf("%s.%s", FILENAME, storagename) msg := fmt.Sprint("Loaded new items in memory amount: ", len(ITEMS)) fmt.Printf(WarningColorN, msg) //makeIndex() - BuildGeoIndex() + //BuildGeoIndex() return storagename, storagefunc, retrievefunc, filename } diff --git a/main.go b/main.go index d694fd4..906e840 100644 --- a/main.go +++ b/main.go @@ -2,8 +2,8 @@ package main import ( "fmt" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" + //"github.com/prometheus/client_golang/prometheus" + //"github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" "log" "net/http" // "runtime/debug" "github.com/pkg/profile") @@ -64,7 +64,7 @@ func loadcsv(itemChan ItemsChannel) { } func main() { - SETTINGS.Set("http_db_host", "0.0.0.0:8128", "host with port") + SETTINGS.Set("http_db_host", "0.0.0.0:8000", "host with port") SETTINGS.Set("SHAREDSECRET", "", "jwt shared secret") SETTINGS.Set("JWTENABLED", "y", "JWT enabled") @@ -91,8 +91,6 @@ func main() { SETTINGS.Parse() - // ITEMS = labeledItems{} - Operations = GroupedOperations{ Funcs: RegisterFuncMap, GroupBy: RegisterGroupBy, diff --git a/operations.go b/operations.go index 2f4df98..7cc67ec 100644 --- a/operations.go +++ b/operations.go @@ -4,14 +4,12 @@ import ( "encoding/json" "fmt" "net/http" - "net/url" // "reflect" "errors" "log" "sort" - "log" //"sort" "strconv" "strings" @@ -39,15 +37,6 @@ func (ft filterType) CacheKey() string { return strings.Join(filterlist, "-") } -func (ft filterType) CacheKey() string { - filterlist := []string{} - for k, v := range ft { - filterlist = append(filterlist, fmt.Sprintf("%s=%s", k, v)) - } - sort.Strings(filterlist) - return strings.Join(filterlist, "-") -} - type formatRespFunc func(w http.ResponseWriter, r *http.Request, items Items) type registerFormatMap map[string]formatRespFunc @@ -121,24 +110,6 @@ func (q Query) CacheKey() (string, error) { } -func decodeUrl(s string) string { - newS, err := url.QueryUnescape(s) - if err != nil { - return "", errors.New("bitarrays not cached") - } - - keys := []string{ - q.Filters.CacheKey(), - q.Excludes.CacheKey(), - q.Anys.CacheKey(), - q.GroupBy, - q.Reduce, - q.ReturnFormat, - } - - return strings.Join(keys, "-"), nil -} - // parseURLParameters checks parameters and builds a query to be run. func parseURLParameters(r *http.Request) (Query, error) { filterMap := make(filterType) @@ -158,9 +129,6 @@ func parseURLParameters(r *http.Request) (Query, error) { } } - // we can post gejson data - - urlItems := r.URL.Query() // parse params and body posts // (geo)json data r.ParseForm() @@ -200,8 +168,6 @@ func parseURLParameters(r *http.Request) (Query, error) { // Check and validate reduce parameter parameter, found = r.Form["reduce"] - parameter, found := urlItems["groupby"] - if found && parameter[0] != "" { _, funcFound1 := RegisterGroupBy[parameter[0]] _, funcFound2 := RegisterGroupByCustom[parameter[0]] @@ -281,9 +247,6 @@ func parseURLParameters(r *http.Request) (Query, error) { GroupBy: groupBy, Reduce: reduce, - GroupBy: groupBy, - Reduce: reduce, - Limit: limit, LimitGiven: limitGiven, @@ -395,7 +358,7 @@ func max(a, b int) int { return b } -func filteredEarlyExit(items *labeledItems, operations GroupedOperations, query Query) Items { +func filteredEarlyExit(items *Items, operations GroupedOperations, query Query) Items { registerFuncs := operations.Funcs filteredItems := make(Items, 0, len(*items)/4) @@ -414,7 +377,7 @@ func filteredEarlyExit(items *labeledItems, operations GroupedOperations, query lock.RLock() defer lock.RUnlock() - for _, item := range items { + for _, item := range *items { if !any(item, anys, registerFuncs) { continue } @@ -434,7 +397,7 @@ func filteredEarlyExit(items *labeledItems, operations GroupedOperations, query return filteredItems } -func filteredEarlyExitSingle(items *labeledItems, column string, operations GroupedOperations, query Query) []string { +func filteredEarlyExitSingle(items *Items, column string, operations GroupedOperations, query Query) []string { registerFuncs := operations.Funcs filteredItemsSet := make(map[string]bool) excludes := query.Excludes @@ -452,7 +415,7 @@ func filteredEarlyExitSingle(items *labeledItems, column string, operations Grou lock.RLock() defer lock.RUnlock() - for _, item := range items { + for _, item := range *items { if !any(item, anys, registerFuncs) { continue } @@ -492,9 +455,9 @@ func filteredEarlyExitSingle(items *labeledItems, column string, operations Grou // for columns with not so unique values it makes sense te create bitarrays. // to do fast bitwise operations. func bitArrayFilter( - items *labeledItems, + items *Items, operations GroupedOperations, - query Query) (labeledItems, error) { + query Query) (Items, error) { balock.RLock() defer balock.RUnlock() @@ -544,7 +507,7 @@ func bitArrayFilter( log.Fatal("something went wrong with bitarray..") } - newItems := make(labeledItems, 0) + newItems := make(Items, 0) labels := bitArrayResult.ToNums() for _, l := range labels { @@ -554,7 +517,7 @@ func bitArrayFilter( return newItems, nil } -func runQuery(items *labeledItems, query Query, operations GroupedOperations) (Items, int64) { +func runQuery(items *Items, query Query, operations GroupedOperations) (Items, int64) { start := time.Now() var newItems Items @@ -569,7 +532,7 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I } } - var nextItems *labeledItems + var nextItems *Items filteredItems, err := bitArrayFilter(items, operations, query) if err != nil { @@ -579,7 +542,7 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I } if query.IndexGiven && len(STR_INDEX) > 0 { - items = make(Items, 0) + items := make(Items, 0) indices := INDEX.Lookup([]byte(query.IndexQuery), -1) seen := make(map[string]bool) for _, idx := range indices { @@ -619,7 +582,7 @@ func runQuery(items *labeledItems, query Query, operations GroupedOperations) (I } func runTypeAheadQuery( - items *labeledItems, column string, query Query, + items *Items, column string, query Query, operations GroupedOperations) ([]string, int64) { start := time.Now() results := filteredEarlyExitSingle(items, column, operations, query) @@ -627,7 +590,7 @@ func runTypeAheadQuery( return results, int64(diff) / int64(1000000) } -func filtered(items *labeledItems, operations GroupedOperations, query Query) Items { +func filtered(items *Items, operations GroupedOperations, query Query) Items { registerFuncs := operations.Funcs filteredItems := make(Items, 0) excludes := query.Excludes diff --git a/store.go b/store.go index 750327c..0178701 100644 --- a/store.go +++ b/store.go @@ -5,7 +5,6 @@ import ( ) //Items -type labeledItems []*Item type Items []*Item type ItemsIn []*ItemIn type ItemsOut []*ItemOut @@ -13,11 +12,11 @@ type ItemsOut []*ItemOut type ItemsGroupedBy map[string]Items type ItemsChannel chan ItemsIn -var ITEMS labeledItems +var ITEMS Items var itemChan ItemsChannel func init() { - ITEMS = labeledItems{} + ITEMS = Items{} } func ItemChanWorker(itemChan ItemsChannel) { From d4a1ad69d68c009d13e9c049172504bd064152b9 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Mon, 22 Mar 2021 15:12:12 +0100 Subject: [PATCH 40/54] remove merge mistake --- operations.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/operations.go b/operations.go index 7cc67ec..4aada53 100644 --- a/operations.go +++ b/operations.go @@ -159,18 +159,7 @@ func parseURLParameters(r *http.Request) (Query, error) { if found && parameter[0] != "" { _, funcFound1 := RegisterGroupBy[parameter[0]] _, funcFound2 := RegisterGroupByCustom[parameter[0]] - if !funcFound1 && !funcFound2 { - return Query{}, errors.New("invalid groupby parameter") - } - groupBy = parameter[0] - } - // Check and validate reduce parameter - parameter, found = r.Form["reduce"] - - if found && parameter[0] != "" { - _, funcFound1 := RegisterGroupBy[parameter[0]] - _, funcFound2 := RegisterGroupByCustom[parameter[0]] if !funcFound1 && !funcFound2 { return Query{}, errors.New("invalid groupby parameter") } From ff690a74319551e4ec026f46afbd2ea94e6aa5fe Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Mon, 22 Mar 2021 17:48:07 +0100 Subject: [PATCH 41/54] remove merge mistake --- http_handlers.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/http_handlers.go b/http_handlers.go index 80dcdf7..5cdf944 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -314,15 +314,8 @@ func handleInputStorage(r *http.Request) (string, storageFunc, retrieveFunc, str retrievefunc = RETRIEVEFUNCS[storagename] } - // empty exising ITEMS - ITEMS = Items{} - filename := fmt.Sprintf("%s.%s", FILENAME, storagename) - msg := fmt.Sprint("Loaded new items in memory amount: ", len(ITEMS)) - fmt.Printf(WarningColorN, msg) - //makeIndex() - //BuildGeoIndex() return storagename, storagefunc, retrievefunc, filename } @@ -341,7 +334,6 @@ func saveRest(w http.ResponseWriter, r *http.Request) { fmt.Println("unable to write file reason:", err) w.WriteHeader(500) return - } msg = fmt.Sprintf("filname %s, filesize: %d mb\n", filename, size/1024/1025) fmt.Printf(WarningColor, msg) From 7faf26bfa6f5f232ff2500f75e8e56bcb3504e30 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 20 Apr 2021 00:44:03 +0200 Subject: [PATCH 42/54] wip working new storage / retrieve methods --- geo.go | 9 +- http_handlers.go | 4 +- main.go | 7 +- model.go | 205 +------------------ model_maps.go | 444 ++++++++++++++++++++++++++++++++++++++++++ storage_operations.go | 101 ++++++---- store.go | 19 ++ 7 files changed, 539 insertions(+), 250 deletions(-) create mode 100644 model_maps.go diff --git a/geo.go b/geo.go index 78f2d36..e11d400 100644 --- a/geo.go +++ b/geo.go @@ -48,13 +48,16 @@ func (c s2CellIndex) Less(i, j int) bool { return c[i].ID < c[j].ID } var S2CELLS s2CellIndex var S2CELLMAP s2CellMap +func clearGeoIndex() { + S2CELLS = make(s2CellIndex, 0) + S2CELLMAP = s2CellMap{} +} + func init() { minLevel = 2 maxLevel = 21 maxCells = 450 - - S2CELLS = make(s2CellIndex, 0) - S2CELLMAP = s2CellMap{} + clearGeoIndex() } func BuildGeoIndex() { diff --git a/http_handlers.go b/http_handlers.go index 5cdf944..e960a1b 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -277,7 +277,7 @@ func loadRest(w http.ResponseWriter, r *http.Request) { msg := fmt.Sprintf("retrieving with: %s, with filename: %s", storagename, filename) fmt.Printf(WarningColorN, msg) - itemsAdded, err := retrievefunc(ITEMS, filename) + itemsAdded, err := retrievefunc(filename) if err != nil { log.Printf("could not open %s reason %s", filename, err) w.Write([]byte("500 - could not load data")) @@ -329,7 +329,7 @@ func saveRest(w http.ResponseWriter, r *http.Request) { msg = fmt.Sprintf("storage method: %s filename: %s\n", storagename, filename) fmt.Printf(WarningColor, msg) - size, err := storagefunc(ITEMS, filename) + size, err := storagefunc(filename) if err != nil { fmt.Println("unable to write file reason:", err) w.WriteHeader(500) diff --git a/main.go b/main.go index 906e840..45f6488 100644 --- a/main.go +++ b/main.go @@ -15,11 +15,6 @@ type jwtConfig struct { SharedSecret string } -type storageFunc func(Items, string) (int64, error) -type retrieveFunc func(Items, string) (int, error) -type storageFuncs map[string]storageFunc -type retrieveFuncs map[string]retrieveFunc - // Colors are fun, and can be used to note that this is joyfull and fun project. const ( InfoColor = "\033[1;34m%s\033[0m" @@ -81,7 +76,7 @@ func main() { SETTINGS.Set("strict-mode", "y", "strict mode does not allow ingestion of invalid items and will reject the batch") SETTINGS.Set("prometheus-monitoring", "n", "add promethues monitoring endpoint") - SETTINGS.Set("STORAGEMETHOD", "bytes", "Storagemethod available options are json, jsonz, bytes, bytesz") + SETTINGS.Set("STORAGEMETHOD", "bytesz", "Storagemethod available options are json, jsonz, bytes, bytesz") SETTINGS.Set("LOADATSTARTUP", "n", "Load data at startup. ('y', 'n')") SETTINGS.Set("readonly", "yes", "only allow read only funcions") diff --git a/model.go b/model.go index e2e53d2..a6a8fcb 100644 --- a/model.go +++ b/model.go @@ -32,7 +32,6 @@ import ( "sort" "strconv" "strings" - "sync" "github.com/Workiva/go-datastructures/bitarray" ) @@ -47,204 +46,8 @@ type fieldIdxMap map[string]uint16 type fieldMapIdx map[uint16]string type fieldItemsMap map[uint16]bitarray.BitArray -// Column maps. -// Store for each non distinct/repeated column -// unit16 -> string map and -// string -> unit16 map -// track count of distinct values - -var WoningTypeTracker uint16 -var WoningTypeIdxMap fieldIdxMap -var WoningType fieldMapIdx - -var WoningTypeItems fieldItemsMap - -var LabelscoreVoorlopigTracker uint16 -var LabelscoreVoorlopigIdxMap fieldIdxMap -var LabelscoreVoorlopig fieldMapIdx - -var LabelscoreVoorlopigItems fieldItemsMap - -var LabelscoreDefinitiefTracker uint16 -var LabelscoreDefinitiefIdxMap fieldIdxMap -var LabelscoreDefinitief fieldMapIdx - -var LabelscoreDefinitiefItems fieldItemsMap - -var GemeentecodeTracker uint16 -var GemeentecodeIdxMap fieldIdxMap -var Gemeentecode fieldMapIdx - -var GemeentecodeItems fieldItemsMap - -var GemeentenaamTracker uint16 -var GemeentenaamIdxMap fieldIdxMap -var Gemeentenaam fieldMapIdx - -var BuurtcodeTracker uint16 -var BuurtcodeIdxMap fieldIdxMap -var Buurtcode fieldMapIdx - -var BuurtcodeItems fieldItemsMap - -var BuurtnaamTracker uint16 -var BuurtnaamIdxMap fieldIdxMap -var Buurtnaam fieldMapIdx - -var WijkcodeTracker uint16 -var WijkcodeIdxMap fieldIdxMap -var Wijkcode fieldMapIdx - -var WijkcodeItems fieldItemsMap - -var WijknaamTracker uint16 -var WijknaamIdxMap fieldIdxMap -var Wijknaam fieldMapIdx - -var ProvinciecodeTracker uint16 -var ProvinciecodeIdxMap fieldIdxMap -var Provinciecode fieldMapIdx - -var ProvinciecodeItems fieldItemsMap - -var ProvincienaamTracker uint16 -var ProvincienaamIdxMap fieldIdxMap -var Provincienaam fieldMapIdx - -var PandGasEanAansluitingenTracker uint16 -var PandGasEanAansluitingenIdxMap fieldIdxMap -var PandGasEanAansluitingen fieldMapIdx - -var P6GasAansluitingen2020Tracker uint16 -var P6GasAansluitingen2020IdxMap fieldIdxMap -var P6GasAansluitingen2020 fieldMapIdx - -var P6Gasm32020Tracker uint16 -var P6Gasm32020IdxMap fieldIdxMap -var P6Gasm32020 fieldMapIdx - -var P6Kwh2020Tracker uint16 -var P6Kwh2020IdxMap fieldIdxMap -var P6Kwh2020 fieldMapIdx - -var PandBouwjaarTracker uint16 -var PandBouwjaarIdxMap fieldIdxMap -var PandBouwjaar fieldMapIdx - -var PandGasAansluitingenTracker uint16 -var PandGasAansluitingenIdxMap fieldIdxMap -var PandGasAansluitingen fieldMapIdx - -var GebruiksdoelenTracker uint16 -var GebruiksdoelenIdxMap fieldIdxMap -var Gebruiksdoelen fieldMapIdx - -/* -var {columnname}Tracker uint16 -var {columnname}IdxMap fieldIdxMap -var {columnname} fieldMapIdx -var {columnname}Items fieldItemmap -*/ - -// item map lock -var lock = sync.RWMutex{} - -// bitArray Lock -var balock = sync.RWMutex{} - func init() { - - WoningTypeTracker = 0 - WoningTypeIdxMap = make(fieldIdxMap) - WoningType = make(fieldMapIdx) - - WoningTypeItems = make(fieldItemsMap) - - LabelscoreVoorlopigTracker = 0 - LabelscoreVoorlopigIdxMap = make(fieldIdxMap) - LabelscoreVoorlopig = make(fieldMapIdx) - - LabelscoreVoorlopigItems = make(fieldItemsMap) - - LabelscoreDefinitiefTracker = 0 - LabelscoreDefinitiefIdxMap = make(fieldIdxMap) - LabelscoreDefinitief = make(fieldMapIdx) - - LabelscoreDefinitiefItems = make(fieldItemsMap) - - GemeentecodeTracker = 0 - GemeentecodeIdxMap = make(fieldIdxMap) - Gemeentecode = make(fieldMapIdx) - - GemeentecodeItems = make(fieldItemsMap) - - GemeentenaamTracker = 0 - GemeentenaamIdxMap = make(fieldIdxMap) - Gemeentenaam = make(fieldMapIdx) - - BuurtcodeTracker = 0 - BuurtcodeIdxMap = make(fieldIdxMap) - Buurtcode = make(fieldMapIdx) - - BuurtcodeItems = make(fieldItemsMap) - - BuurtnaamTracker = 0 - BuurtnaamIdxMap = make(fieldIdxMap) - Buurtnaam = make(fieldMapIdx) - - WijkcodeTracker = 0 - WijkcodeIdxMap = make(fieldIdxMap) - Wijkcode = make(fieldMapIdx) - - WijkcodeItems = make(fieldItemsMap) - - WijknaamTracker = 0 - WijknaamIdxMap = make(fieldIdxMap) - Wijknaam = make(fieldMapIdx) - - ProvinciecodeTracker = 0 - ProvinciecodeIdxMap = make(fieldIdxMap) - Provinciecode = make(fieldMapIdx) - - ProvinciecodeItems = make(fieldItemsMap) - - ProvincienaamTracker = 0 - ProvincienaamIdxMap = make(fieldIdxMap) - Provincienaam = make(fieldMapIdx) - - PandGasEanAansluitingenTracker = 0 - PandGasEanAansluitingenIdxMap = make(fieldIdxMap) - PandGasEanAansluitingen = make(fieldMapIdx) - - P6GasAansluitingen2020Tracker = 0 - P6GasAansluitingen2020IdxMap = make(fieldIdxMap) - P6GasAansluitingen2020 = make(fieldMapIdx) - - P6Gasm32020Tracker = 0 - P6Gasm32020IdxMap = make(fieldIdxMap) - P6Gasm32020 = make(fieldMapIdx) - - P6Kwh2020Tracker = 0 - P6Kwh2020IdxMap = make(fieldIdxMap) - P6Kwh2020 = make(fieldMapIdx) - - PandBouwjaarTracker = 0 - PandBouwjaarIdxMap = make(fieldIdxMap) - PandBouwjaar = make(fieldMapIdx) - - PandGasAansluitingenTracker = 0 - PandGasAansluitingenIdxMap = make(fieldIdxMap) - PandGasAansluitingen = make(fieldMapIdx) - - GebruiksdoelenTracker = 0 - GebruiksdoelenIdxMap = make(fieldIdxMap) - Gebruiksdoelen = make(fieldMapIdx) - - /* - labelscoredefinitiefTracker = 0 - labelscoredefinitiefIdxMap = make(fieldIdxMap) - labelscoredefinitief = make(fieldMapIdx) - */ + setUpMaps() } type ItemIn struct { @@ -617,12 +420,6 @@ func (i ItemIn) Shrink(label int) Item { // for gast item lookup func (i Item) StoreBitArrayColumns() { - balock.Lock() - defer balock.Unlock() - - lock.RLock() - defer lock.RUnlock() - var ba bitarray.BitArray var ok bool diff --git a/model_maps.go b/model_maps.go new file mode 100644 index 0000000..1999365 --- /dev/null +++ b/model_maps.go @@ -0,0 +1,444 @@ +/* + + When transforming ItemsIn to Items and back again to ItemsOut + + maps are needed to store lookup values. + + those are generated here. + +*/ + +package main + +import ( + "sync" +) + +type ModelMaps struct { + WoningTypeTracker uint16 + WoningTypeIdxMap fieldIdxMap + WoningType fieldMapIdx + + LabelscoreVoorlopigTracker uint16 + LabelscoreVoorlopigIdxMap fieldIdxMap + LabelscoreVoorlopig fieldMapIdx + + // LabelscoreVoorlopigItems fieldItemsMap + + LabelscoreDefinitiefTracker uint16 + LabelscoreDefinitiefIdxMap fieldIdxMap + LabelscoreDefinitief fieldMapIdx + + // LabelscoreDefinitiefItems fieldItemsMap + + GemeentecodeTracker uint16 + GemeentecodeIdxMap fieldIdxMap + Gemeentecode fieldMapIdx + + // GemeentecodeItems fieldItemsMap + + GemeentenaamTracker uint16 + GemeentenaamIdxMap fieldIdxMap + Gemeentenaam fieldMapIdx + + BuurtcodeTracker uint16 + BuurtcodeIdxMap fieldIdxMap + Buurtcode fieldMapIdx + + // BuurtcodeItems fieldItemsMap + + BuurtnaamTracker uint16 + BuurtnaamIdxMap fieldIdxMap + Buurtnaam fieldMapIdx + + WijkcodeTracker uint16 + WijkcodeIdxMap fieldIdxMap + Wijkcode fieldMapIdx + + // WijkcodeItems fieldItemsMap + + WijknaamTracker uint16 + WijknaamIdxMap fieldIdxMap + Wijknaam fieldMapIdx + + ProvinciecodeTracker uint16 + ProvinciecodeIdxMap fieldIdxMap + Provinciecode fieldMapIdx + + // ProvinciecodeItems fieldItemsMap + + ProvincienaamTracker uint16 + ProvincienaamIdxMap fieldIdxMap + Provincienaam fieldMapIdx + + PandGasEanAansluitingenTracker uint16 + PandGasEanAansluitingenIdxMap fieldIdxMap + PandGasEanAansluitingen fieldMapIdx + + P6GasAansluitingen2020Tracker uint16 + P6GasAansluitingen2020IdxMap fieldIdxMap + P6GasAansluitingen2020 fieldMapIdx + + P6Gasm32020Tracker uint16 + P6Gasm32020IdxMap fieldIdxMap + P6Gasm32020 fieldMapIdx + + P6Kwh2020Tracker uint16 + P6Kwh2020IdxMap fieldIdxMap + P6Kwh2020 fieldMapIdx + + PandBouwjaarTracker uint16 + PandBouwjaarIdxMap fieldIdxMap + PandBouwjaar fieldMapIdx + + PandGasAansluitingenTracker uint16 + PandGasAansluitingenIdxMap fieldIdxMap + PandGasAansluitingen fieldMapIdx + + GebruiksdoelenTracker uint16 + GebruiksdoelenIdxMap fieldIdxMap + Gebruiksdoelen fieldMapIdx +} + +// Column maps. +// Store for each non distinct/repeated column +// unit16 -> string map and +// string -> unit16 map +// track count of distinct values + +var WoningTypeTracker uint16 +var WoningTypeIdxMap fieldIdxMap +var WoningType fieldMapIdx + +var WoningTypeItems fieldItemsMap + +var LabelscoreVoorlopigTracker uint16 +var LabelscoreVoorlopigIdxMap fieldIdxMap +var LabelscoreVoorlopig fieldMapIdx + +var LabelscoreVoorlopigItems fieldItemsMap + +var LabelscoreDefinitiefTracker uint16 +var LabelscoreDefinitiefIdxMap fieldIdxMap +var LabelscoreDefinitief fieldMapIdx + +var LabelscoreDefinitiefItems fieldItemsMap + +var GemeentecodeTracker uint16 +var GemeentecodeIdxMap fieldIdxMap +var Gemeentecode fieldMapIdx + +var GemeentecodeItems fieldItemsMap + +var GemeentenaamTracker uint16 +var GemeentenaamIdxMap fieldIdxMap +var Gemeentenaam fieldMapIdx + +var BuurtcodeTracker uint16 +var BuurtcodeIdxMap fieldIdxMap +var Buurtcode fieldMapIdx + +var BuurtcodeItems fieldItemsMap + +var BuurtnaamTracker uint16 +var BuurtnaamIdxMap fieldIdxMap +var Buurtnaam fieldMapIdx + +var WijkcodeTracker uint16 +var WijkcodeIdxMap fieldIdxMap +var Wijkcode fieldMapIdx + +var WijkcodeItems fieldItemsMap + +var WijknaamTracker uint16 +var WijknaamIdxMap fieldIdxMap +var Wijknaam fieldMapIdx + +var ProvinciecodeTracker uint16 +var ProvinciecodeIdxMap fieldIdxMap +var Provinciecode fieldMapIdx + +var ProvinciecodeItems fieldItemsMap + +var ProvincienaamTracker uint16 +var ProvincienaamIdxMap fieldIdxMap +var Provincienaam fieldMapIdx + +var PandGasEanAansluitingenTracker uint16 +var PandGasEanAansluitingenIdxMap fieldIdxMap +var PandGasEanAansluitingen fieldMapIdx + +var P6GasAansluitingen2020Tracker uint16 +var P6GasAansluitingen2020IdxMap fieldIdxMap +var P6GasAansluitingen2020 fieldMapIdx + +var P6Gasm32020Tracker uint16 +var P6Gasm32020IdxMap fieldIdxMap +var P6Gasm32020 fieldMapIdx + +var P6Kwh2020Tracker uint16 +var P6Kwh2020IdxMap fieldIdxMap +var P6Kwh2020 fieldMapIdx + +var PandBouwjaarTracker uint16 +var PandBouwjaarIdxMap fieldIdxMap +var PandBouwjaar fieldMapIdx + +var PandGasAansluitingenTracker uint16 +var PandGasAansluitingenIdxMap fieldIdxMap +var PandGasAansluitingen fieldMapIdx + +var GebruiksdoelenTracker uint16 +var GebruiksdoelenIdxMap fieldIdxMap +var Gebruiksdoelen fieldMapIdx + +/* +var {columnname}Tracker uint16 +var {columnname}IdxMap fieldIdxMap +var {columnname} fieldMapIdx +var {columnname}Items fieldItemmap +*/ + +// item map lock +var lock = sync.RWMutex{} + +// bitArray Lock +var balock = sync.RWMutex{} + +func initBitarrays() { + + WoningTypeItems = make(fieldItemsMap) + LabelscoreVoorlopigItems = make(fieldItemsMap) + LabelscoreDefinitiefItems = make(fieldItemsMap) + GemeentecodeItems = make(fieldItemsMap) + BuurtcodeItems = make(fieldItemsMap) + WijkcodeItems = make(fieldItemsMap) +} + +func setUpMaps() { + initBitarrays() + WoningTypeTracker = 0 + WoningTypeIdxMap = make(fieldIdxMap) + WoningType = make(fieldMapIdx) + + LabelscoreVoorlopigTracker = 0 + LabelscoreVoorlopigIdxMap = make(fieldIdxMap) + LabelscoreVoorlopig = make(fieldMapIdx) + + LabelscoreDefinitiefTracker = 0 + LabelscoreDefinitiefIdxMap = make(fieldIdxMap) + LabelscoreDefinitief = make(fieldMapIdx) + + GemeentecodeTracker = 0 + GemeentecodeIdxMap = make(fieldIdxMap) + Gemeentecode = make(fieldMapIdx) + + GemeentenaamTracker = 0 + GemeentenaamIdxMap = make(fieldIdxMap) + Gemeentenaam = make(fieldMapIdx) + + BuurtcodeTracker = 0 + BuurtcodeIdxMap = make(fieldIdxMap) + Buurtcode = make(fieldMapIdx) + + BuurtnaamTracker = 0 + BuurtnaamIdxMap = make(fieldIdxMap) + Buurtnaam = make(fieldMapIdx) + + WijkcodeTracker = 0 + WijkcodeIdxMap = make(fieldIdxMap) + Wijkcode = make(fieldMapIdx) + + WijknaamTracker = 0 + WijknaamIdxMap = make(fieldIdxMap) + Wijknaam = make(fieldMapIdx) + + ProvinciecodeTracker = 0 + ProvinciecodeIdxMap = make(fieldIdxMap) + Provinciecode = make(fieldMapIdx) + + ProvinciecodeItems = make(fieldItemsMap) + + ProvincienaamTracker = 0 + ProvincienaamIdxMap = make(fieldIdxMap) + Provincienaam = make(fieldMapIdx) + + PandGasEanAansluitingenTracker = 0 + PandGasEanAansluitingenIdxMap = make(fieldIdxMap) + PandGasEanAansluitingen = make(fieldMapIdx) + + P6GasAansluitingen2020Tracker = 0 + P6GasAansluitingen2020IdxMap = make(fieldIdxMap) + P6GasAansluitingen2020 = make(fieldMapIdx) + + P6Gasm32020Tracker = 0 + P6Gasm32020IdxMap = make(fieldIdxMap) + P6Gasm32020 = make(fieldMapIdx) + + P6Kwh2020Tracker = 0 + P6Kwh2020IdxMap = make(fieldIdxMap) + P6Kwh2020 = make(fieldMapIdx) + + PandBouwjaarTracker = 0 + PandBouwjaarIdxMap = make(fieldIdxMap) + PandBouwjaar = make(fieldMapIdx) + + PandGasAansluitingenTracker = 0 + PandGasAansluitingenIdxMap = make(fieldIdxMap) + PandGasAansluitingen = make(fieldMapIdx) + + GebruiksdoelenTracker = 0 + GebruiksdoelenIdxMap = make(fieldIdxMap) + Gebruiksdoelen = make(fieldMapIdx) +} + +func CreateMapstore() ModelMaps { + return ModelMaps{ + WoningTypeTracker, + WoningTypeIdxMap, + WoningType, + + LabelscoreVoorlopigTracker, + LabelscoreVoorlopigIdxMap, + LabelscoreVoorlopig, + + LabelscoreDefinitiefTracker, + LabelscoreDefinitiefIdxMap, + LabelscoreDefinitief, + + GemeentecodeTracker, + GemeentecodeIdxMap, + Gemeentecode, + + GemeentenaamTracker, + GemeentenaamIdxMap, + Gemeentenaam, + + BuurtcodeTracker, + BuurtcodeIdxMap, + Buurtcode, + + BuurtnaamTracker, + BuurtnaamIdxMap, + Buurtnaam, + + WijkcodeTracker, + WijkcodeIdxMap, + Wijkcode, + + WijknaamTracker, + WijknaamIdxMap, + Wijknaam, + + ProvinciecodeTracker, + ProvinciecodeIdxMap, + Provinciecode, + + ProvincienaamTracker, + ProvincienaamIdxMap, + Provincienaam, + + PandGasEanAansluitingenTracker, + PandGasEanAansluitingenIdxMap, + PandGasEanAansluitingen, + + P6GasAansluitingen2020Tracker, + P6GasAansluitingen2020IdxMap, + P6GasAansluitingen2020, + + P6Gasm32020Tracker, + P6Gasm32020IdxMap, + P6Gasm32020, + + P6Kwh2020Tracker, + P6Kwh2020IdxMap, + P6Kwh2020, + + PandBouwjaarTracker, + PandBouwjaarIdxMap, + PandBouwjaar, + + PandGasAansluitingenTracker, + PandGasAansluitingenIdxMap, + PandGasAansluitingen, + + GebruiksdoelenTracker, + GebruiksdoelenIdxMap, + Gebruiksdoelen, + } +} + +func LoadMapstore(m ModelMaps) { + + WoningTypeTracker = m.WoningTypeTracker + WoningTypeIdxMap = m.WoningTypeIdxMap + WoningType = m.WoningType + + LabelscoreVoorlopigTracker = m.LabelscoreVoorlopigTracker + LabelscoreVoorlopigIdxMap = m.LabelscoreVoorlopigIdxMap + LabelscoreVoorlopig = m.LabelscoreVoorlopig + + LabelscoreDefinitiefTracker = m.LabelscoreDefinitiefTracker + LabelscoreDefinitiefIdxMap = m.LabelscoreDefinitiefIdxMap + LabelscoreDefinitief = m.LabelscoreDefinitief + + GemeentecodeTracker = m.GemeentecodeTracker + GemeentecodeIdxMap = m.GemeentecodeIdxMap + Gemeentecode = m.Gemeentecode + + GemeentenaamTracker = m.GemeentenaamTracker + GemeentenaamIdxMap = m.GemeentenaamIdxMap + Gemeentenaam = m.Gemeentenaam + + BuurtcodeTracker = m.BuurtcodeTracker + BuurtcodeIdxMap = m.BuurtcodeIdxMap + Buurtcode = m.Buurtcode + + BuurtnaamTracker = m.BuurtnaamTracker + BuurtnaamIdxMap = m.BuurtnaamIdxMap + Buurtnaam = m.Buurtnaam + + WijkcodeTracker = m.WijkcodeTracker + WijkcodeIdxMap = m.WijkcodeIdxMap + Wijkcode = m.Wijkcode + + WijknaamTracker = m.WijknaamTracker + WijknaamIdxMap = m.WijknaamIdxMap + Wijknaam = m.Wijknaam + + ProvinciecodeTracker = m.ProvinciecodeTracker + ProvinciecodeIdxMap = m.ProvinciecodeIdxMap + Provinciecode = m.Provinciecode + + ProvincienaamTracker = m.ProvincienaamTracker + ProvincienaamIdxMap = m.ProvincienaamIdxMap + Provincienaam = m.Provincienaam + + PandGasEanAansluitingenTracker = m.PandGasEanAansluitingenTracker + PandGasEanAansluitingenIdxMap = m.PandGasEanAansluitingenIdxMap + PandGasEanAansluitingen = m.PandGasEanAansluitingen + + P6GasAansluitingen2020Tracker = m.P6GasAansluitingen2020Tracker + P6GasAansluitingen2020IdxMap = m.P6GasAansluitingen2020IdxMap + P6GasAansluitingen2020 = m.P6GasAansluitingen2020 + + P6Gasm32020Tracker = m.P6Gasm32020Tracker + P6Gasm32020IdxMap = m.P6Gasm32020IdxMap + P6Gasm32020 = m.P6Gasm32020 + + P6Kwh2020Tracker = m.P6Kwh2020Tracker + P6Kwh2020IdxMap = m.P6Kwh2020IdxMap + P6Kwh2020 = m.P6Kwh2020 + + PandBouwjaarTracker = m.PandBouwjaarTracker + PandBouwjaarIdxMap = m.PandBouwjaarIdxMap + PandBouwjaar = m.PandBouwjaar + + PandGasAansluitingenTracker = m.PandGasAansluitingenTracker + PandGasAansluitingenIdxMap = m.PandGasEanAansluitingenIdxMap + PandGasAansluitingen = m.PandGasAansluitingen + + GebruiksdoelenTracker = m.GebruiksdoelenTracker + GebruiksdoelenIdxMap = m.GebruiksdoelenIdxMap + Gebruiksdoelen = m.Gebruiksdoelen +} diff --git a/storage_operations.go b/storage_operations.go index 0a84c2c..46728dd 100644 --- a/storage_operations.go +++ b/storage_operations.go @@ -4,7 +4,7 @@ import ( "bytes" "compress/gzip" "encoding/gob" - "encoding/json" + // "encoding/json" "fmt" "io/ioutil" "log" @@ -12,6 +12,17 @@ import ( "time" ) +type Store struct { + Items Items + Maps ModelMaps +} + +type storageFunc func(string) (int64, error) +type retrieveFunc func(string) (int, error) + +type storageFuncs map[string]storageFunc +type retrieveFuncs map[string]retrieveFunc + var STORAGEFUNCS storageFuncs var RETRIEVEFUNCS retrieveFuncs @@ -19,16 +30,17 @@ func init() { STORAGEFUNCS = make(storageFuncs) STORAGEFUNCS["bytes"] = saveAsBytes // currently default STORAGEFUNCS["bytesz"] = saveAsBytesCompressed - STORAGEFUNCS["json"] = saveAsJsonZipped - STORAGEFUNCS["jsonz"] = saveAsJsonZipped + // STORAGEFUNCS["json"] = saveAsJsonZipped + // STORAGEFUNCS["jsonz"] = saveAsJsonZipped RETRIEVEFUNCS = make(retrieveFuncs) RETRIEVEFUNCS["bytes"] = loadAsBytes // currently default RETRIEVEFUNCS["bytesz"] = loadAsBytesCompressed - RETRIEVEFUNCS["json"] = loadAsJsonZipped - RETRIEVEFUNCS["jsonz"] = loadAsJsonZipped + // RETRIEVEFUNCS["json"] = loadAsJsonZipped + // RETRIEVEFUNCS["jsonz"] = loadAsJsonZipped } +/* func saveAsJsonZipped(items Items, filename string) (int64, error) { var b bytes.Buffer writer := gzip.NewWriter(&b) @@ -48,9 +60,22 @@ func saveAsJsonZipped(items Items, filename string) (int64, error) { size := fi.Size() return size, nil } +*/ -func saveAsBytes(items Items, filename string) (int64, error) { - data := EncodeItems(items) +func makeStore() Store { + return Store{ITEMS, CreateMapstore()} +} + +func restoreStore(store Store) { + ITEMS = store.Items + LoadMapstore(store.Maps) + // rebuild indexes + ITEMS.FillIndexes() +} + +func saveAsBytes(filename string) (int64, error) { + store := makeStore() + data := EncodeItems(store) WriteToFile(data, filename) fi, err := os.Stat(filename) if err != nil { @@ -61,8 +86,9 @@ func saveAsBytes(items Items, filename string) (int64, error) { return size, nil } -func saveAsBytesCompressed(items Items, filename string) (int64, error) { - data := EncodeItems(items) +func saveAsBytesCompressed(filename string) (int64, error) { + store := makeStore() + data := EncodeItems(store) data = Compress(data) WriteToFile(data, filename) fi, err := os.Stat(filename) @@ -74,10 +100,10 @@ func saveAsBytesCompressed(items Items, filename string) (int64, error) { return size, nil } -func EncodeItems(items Items) []byte { +func EncodeItems(s Store) []byte { buf := bytes.Buffer{} enc := gob.NewEncoder(&buf) - err := enc.Encode(items) + err := enc.Encode(s) if err != nil { fmt.Println("error encoding", err) } @@ -103,14 +129,14 @@ func Decompress(s []byte) []byte { return data } -func DecodeToItems(s []byte) Items { - items := make(Items, 0, 100*1000) +func DecodeToStore(s []byte) Store { + store := Store{} decoder := gob.NewDecoder(bytes.NewReader(s)) - err := decoder.Decode(&items) + err := decoder.Decode(&store) if err != nil { - fmt.Println("Unable to DecodeToItems", err) + fmt.Println("Unable to Decode", err) } - return items + return store } func WriteToFile(s []byte, filename string) { @@ -133,22 +159,22 @@ func ReadFromFile(filename string) []byte { return data } -func loadAsBytes(items Items, filename string) (int, error) { +func loadAsBytes(filename string) (int, error) { d := ReadFromFile(filename) - items = DecodeToItems(d) - ITEMS = items - return len(items), nil + store := DecodeToStore(d) + restoreStore(store) + return len(ITEMS), nil } -func loadAsBytesCompressed(items Items, filename string) (int, error) { +func loadAsBytesCompressed(filename string) (int, error) { d := ReadFromFile(filename) d = Decompress(d) - items = DecodeToItems(d) - ITEMS = make(Items, 0, 100*1000) - ITEMS = items - return len(items), nil + store := DecodeToStore(d) + restoreStore(store) + return len(ITEMS), nil } +/* func loadAsJsonZipped(items Items, filename string) (int, error) { fi, err := os.Open(filename) if err != nil { @@ -180,6 +206,7 @@ func loadAsJsonZipped(items Items, filename string) (int, error) { s = nil return len(ITEMS), nil } +*/ func loadAtStart(storagename string, filename string, indexed bool) { @@ -195,7 +222,7 @@ func loadAtStart(storagename string, filename string, indexed bool) { fmt.Printf(WarningColorN, msg) start := time.Now() - itemsAdded, err := retrievefunc(ITEMS, filename) + itemsAdded, err := retrievefunc(filename) if err != nil { log.Fatal(fmt.Sprintf("could not open %s reason %s", filename, err)) } @@ -204,13 +231,17 @@ func loadAtStart(storagename string, filename string, indexed bool) { msg = fmt.Sprint("Loaded in memory amount: ", itemsAdded, " time: ", diff) fmt.Printf(WarningColorN, msg) - if indexed { - start = time.Now() - msg := fmt.Sprint("Creating index") - fmt.Printf(WarningColorN, msg) - makeIndex() - diff = time.Since(start) - msg = fmt.Sprint("Index set time: ", diff) - fmt.Printf(WarningColorN, msg) - } + ITEMS.FillIndexes() + + /* + if indexed { + start = time.Now() + msg := fmt.Sprint("Creating index") + fmt.Printf(WarningColorN, msg) + makeIndex() + diff = time.Since(start) + msg = fmt.Sprint("Index set time: ", diff) + fmt.Printf(WarningColorN, msg) + } + */ } diff --git a/store.go b/store.go index 0178701..6aed56f 100644 --- a/store.go +++ b/store.go @@ -1,7 +1,9 @@ package main import ( + "fmt" "log" + "time" ) //Items @@ -38,3 +40,20 @@ func ItemChanWorker(itemChan ItemsChannel) { } } } + +func (items Items) FillIndexes() { + + start := time.Now() + + clearGeoIndex() + initBitarrays() + + for i := range items { + ITEMS[i].StoreBitArrayColumns() + ITEMS[i].GeoIndex(ITEMS[i].Label) + } + + diff := time.Since(start) + msg := fmt.Sprint("Index set time: ", diff) + fmt.Printf(WarningColorN, msg) +} From a7fc206cbaae306925db25087689046370794887 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 20 Apr 2021 14:53:37 +0200 Subject: [PATCH 43/54] first working tests --- geo.go | 2 + http_handlers_test.go | 72 ++++++++++++++++++ main.go | 58 +++++++++----- operations.go | 1 + ...aselectie_vbo_energie_20210217.head.csv.gz | Bin 0 -> 698 bytes 5 files changed, 115 insertions(+), 18 deletions(-) create mode 100644 http_handlers_test.go create mode 100644 testdata/dataselectie_vbo_energie_20210217.head.csv.gz diff --git a/geo.go b/geo.go index e11d400..6b51e07 100644 --- a/geo.go +++ b/geo.go @@ -64,6 +64,8 @@ func BuildGeoIndex() { for i, v := range ITEMS { v.GeoIndex(i) } + + defer S2CELLS.Sort() } func (c cellIndexNode) IsEmpty() bool { diff --git a/http_handlers_test.go b/http_handlers_test.go new file mode 100644 index 0000000..c737ef9 --- /dev/null +++ b/http_handlers_test.go @@ -0,0 +1,72 @@ +/* + +# test some basic request handling. + + - typeahead: http://%s/list/?typeahead=ams&limit=10", host), + - search: http://%s/list/?search=ams&page=1&pagesize=1", host), + - search with limit: http://%s/list/?search=10&page=1&pagesize=10&limit=5", host), + - sorting: http://%s/list/?search=100&page=10&pagesize=100&sortby=-country", host), + - filtering: http://%s/list/?search=10&ontains=144&contains-case=10&page=1&pagesize=1", host), + - groupby: http://%s/list/?search=10&contains-case=10&groupby=country", host), + - aggregation: http://%s/list/?search=10&contains-case=10&groupby=country&reduce=count", host), + - chain the same filters: http://%s/list/?search=10&contains-case=127&contains-case=0&contains-case=1", host), + - typeahead use the name of the column in this case IP: http://%s/typeahead/ip/?starts-with=127&limit=15", host), + + +*/ +package main + +import ( + "fmt" + // "io" + // "net/http" + "net/http/httptest" + "testing" +) + +/* load some data 19 records*/ +func TestMain(m *testing.M) { + + SETTINGS.Set( + "csv", "./testdata/dataselectie_vbo_energie_20210217.head.csv.gz", + "test dataset") + + SETTINGS.Set("channelwait", "0.001s", "timeout for channel loading") + itemChan := make(ItemsChannel, 1) + loadcsv(itemChan) + close(itemChan) + ItemChanWorker(itemChan) + // Run the test + m.Run() +} + +func TestCsvLoading(t *testing.T) { + + fmt.Println(len(ITEMS)) + size := len(ITEMS) + if size != 19 { + t.Errorf("expected 19 ITEMS got %d", size) + } +} + +func TestBasicHandlers(t *testing.T) { + + handler := setupHandler() + + urls := []string{ + "/list/", + "/typeahead/pid/?search=1", + "/help/", + } + + for i := range urls { + req := httptest.NewRequest("GET", urls[i], nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + resp := w.Result() + if resp.StatusCode != 200 { + t.Errorf("request to %s failed", urls[i]) + t.Error(resp) + } + } +} diff --git a/main.go b/main.go index 45f6488..32055ad 100644 --- a/main.go +++ b/main.go @@ -46,16 +46,16 @@ func loadcsv(itemChan ItemsChannel) { // make sure channels are empty // add timeout there is no garantee ItemsChannel // is empty and you miss a few records - time.Sleep(5 * time.Second) - S2CELLS.Sort() - fmt.Println("Sorted") + timeout, _ := time.ParseDuration(SETTINGS.Get("channelwait")) + time.Sleep(timeout) + // S2CELLS.Sort() + fmt.Println("csv imported") cacheLock.Lock() defer cacheLock.Unlock() GroupByBodyCache = make(map[string]GroupByResult) GroupByHeaderCache = make(map[string]HeaderData) - // makeIndex() } func main() { @@ -84,15 +84,10 @@ func main() { SETTINGS.Set("groupbycache", "yes", "use in memory cache") + SETTINGS.Set("channelwait", "5s", "timeout") + SETTINGS.Parse() - Operations = GroupedOperations{ - Funcs: RegisterFuncMap, - GroupBy: RegisterGroupBy, - Getters: RegisterGetters, - Reduce: RegisterReduce, - BitArrays: RegisterBitArray, - } itemChan := make(ItemsChannel, 1000) go ItemChanWorker(itemChan) @@ -109,18 +104,38 @@ func main() { fmt.Println("start loading") go loadAtStart(SETTINGS.Get("STORAGEMETHOD"), FILENAME, SETTINGS.Get("indexed") == "y") } + + ipPort := SETTINGS.Get("http_db_host") + + mux := setupHandler() + + msg := fmt.Sprint( + "starting server\nhost: ", + ipPort, + ) + fmt.Printf(InfoColorN, msg) + log.Fatal(http.ListenAndServe(ipPort, mux)) +} + +func setupHandler() http.Handler { + JWTConfig := jwtConfig{ Enabled: SETTINGS.Get("JWTENABLED") == "yes", SharedSecret: SETTINGS.Get("SHAREDSECRET"), } - listRest := contextListRest(JWTConfig, itemChan, Operations) - addRest := contextAddRest(JWTConfig, itemChan, Operations) + Operations = GroupedOperations{ + Funcs: RegisterFuncMap, + GroupBy: RegisterGroupBy, + Getters: RegisterGetters, + Reduce: RegisterReduce, + BitArrays: RegisterBitArray, + } searchRest := contextSearchRest(JWTConfig, itemChan, Operations) typeAheadRest := contextTypeAheadRest(JWTConfig, itemChan, Operations) - - ipPort := SETTINGS.Get("http_db_host") + listRest := contextListRest(JWTConfig, itemChan, Operations) + addRest := contextAddRest(JWTConfig, itemChan, Operations) mux := http.NewServeMux() @@ -142,13 +157,20 @@ func main() { if SETTINGS.Get("prometheus-monitoring") == "y" { mux.Handle("/metrics", promhttp.Handler()) } + fmt.Println("indexed: ", SETTINGS.Get("indexed")) cors := SETTINGS.Get("CORS") == "y" - msg := fmt.Sprint("starting server\nhost: ", ipPort, " with:", len(ITEMS), "items ", "management api's: ", SETTINGS.Get("mgmt") == "y", " jwt enabled: ", JWTConfig.Enabled, " monitoring: ", SETTINGS.Get("prometheus-monitoring") == "yes", " CORS: ", cors) + middleware := MIDDLEWARE(cors) + + msg := fmt.Sprint( + "setup http handler:", + " with:", len(ITEMS), "items ", + "management api's: ", SETTINGS.Get("mgmt") == "y", + " jwt enabled: ", JWTConfig.Enabled, " monitoring: ", SETTINGS.Get("prometheus-monitoring") == "yes", " CORS: ", cors) + fmt.Printf(InfoColorN, msg) - middleware := MIDDLEWARE(cors) - log.Fatal(http.ListenAndServe(ipPort, middleware(mux))) + return middleware(mux) } diff --git a/operations.go b/operations.go index 4aada53..d29af2b 100644 --- a/operations.go +++ b/operations.go @@ -420,6 +420,7 @@ func filteredEarlyExitSingle(items *Items, column string, operations GroupedOper single := f(item) filteredItemsSet[single] = true } else { + fmt.Print(operations.Getters) fmt.Println(column) fmt.Println("missing getter?") } diff --git a/testdata/dataselectie_vbo_energie_20210217.head.csv.gz b/testdata/dataselectie_vbo_energie_20210217.head.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..55f9a0672fbc951b4a48904e59eddc3e86a364db GIT binary patch literal 698 zcmV;r0!95FiwFphm409V17u-zVRL0{Wn*+{WnXq;Z(n6@WpZa}WnVHdGBGeRF*hz~ zWnpA4V{>)@<&{ly(=ZT*@BE5PPIN}J`j&ib4^ReZd*I{fQDQZywk2cB$uJE6ot2vu zyFRot9H7*hNwlxz-OZ!bYT3AAiI&^p+xPYG+k`HzLt$YUn#Q)<%9Jr#yWpMQ*l)et zn#%eJk||o--A(23dy^JcW@W2x9a@`jLujkexXn~9?6vbQI(rTJ#?}_x*?rK1>5Zwu zT=ji>$Y&Vu^_{yZ_i`HU)us(w=hyDw?9M*S2R9_%7+>TY)8*Fq+!)_gJ){Hw&Zi98 z&^Ng&a*8QVi{-F%vTQn6vyolyzK*OIqA^vT0^Z@wYkJ6G75d$cG3_{~Sw42v_O9%T zV1fJc#S$~g2*#LDqDdxGq$J^_;Y{*OvAsm`@Mway1c_imPS2KzD2~X%To5eC5?weO z7qF4mBMhH6AJY5iwCA-Hiu1q{JO=H>!_30y0 zPm0kl1pi1t8F-as4oHsx>3ikFo)aLY1^Ax>5+gv2YsAKc&Y$oBTe%zP$Yp>`ABBie z4DU-dLNrJevvCiCV7~#$=hJ~C(JGra=nRh!*t7%5XVrlQK-nA+83B@b3h4jtKtj)3 z^!yIX=;Vq|cRU|4Qh)n|aoAU&z^EYE9*z=B2(B>G{AlP1_u*hHK!50j-QxoVr1A6$ g Date: Tue, 20 Apr 2021 19:50:47 +0200 Subject: [PATCH 44/54] first geo testing wip --- csv.go | 22 +++++-- geo.go | 6 +- http_handlers_test.go | 60 ++++++++++++++++-- main.go | 10 ++- store.go | 1 + ...aselectie_vbo_energie_20210217.head.csv.gz | Bin 698 -> 708 bytes 6 files changed, 82 insertions(+), 17 deletions(-) diff --git a/csv.go b/csv.go index 43161d1..a4f49f8 100644 --- a/csv.go +++ b/csv.go @@ -22,14 +22,17 @@ func containsDelimiter(col string) bool { // Parse columns from first header row or from flags func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string, error) { + var err error var columns []string + if fields != "" { columns = strings.Split(fields, ",") if skipHeader { - reader.Read() //Force consume one row + reader.Read() // Force consume one row } + } else { columns, err = reader.Read() fmt.Printf("%v columns\n%v\n", len(columns), columns) @@ -37,6 +40,10 @@ func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string, fmt.Printf("FOUND ERR\n") return nil, err } + itemIn := ItemIn{} + if len(columns) != len(itemIn.Columns()) { + panic(errors.New("columns mismatch")) + } } for _, col := range columns { @@ -46,15 +53,12 @@ func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string, } } - //for i, col := range columns { - // columns[i] = postgresify(col) - //} - return columns, nil } func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, delimiter string, nullDelimiter string) (error, int, int) { + success := 0 failed := 0 @@ -64,6 +68,7 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, itemIn := ItemIn{} columns := itemIn.Columns() cols := make([]interface{}, len(columns)) + record, err := reader.Read() if err == io.EOF { @@ -72,6 +77,7 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, if err != nil { line := strings.Join(record, delimiter) + failed++ if ignoreErrors { @@ -96,6 +102,7 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, // marschall it to bytes b, _ := json.Marshal(itemMap) + // fill the new Item instance with values if err := json.Unmarshal([]byte(b), &itemIn); err != nil { line := strings.Join(record, delimiter) @@ -114,6 +121,7 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, itemChan <- items items = ItemsIn{} } + items = append(items, &itemIn) success++ } @@ -127,7 +135,8 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, func importCSV(filename string, itemChan ItemsChannel, ignoreErrors bool, skipHeader bool, - delimiter string, nullDelimiter string) error { + delimiter string, nullDelimiter string, +) error { dialect := csv.Dialect{} dialect.Delimiter, _ = utf8.DecodeRuneInString(delimiter) @@ -157,6 +166,7 @@ func importCSV(filename string, itemChan ItemsChannel, var err error _, err = parseColumns(reader, skipHeader, "") + if err != nil { log.Fatal(err) } diff --git a/geo.go b/geo.go index 6b51e07..925e911 100644 --- a/geo.go +++ b/geo.go @@ -62,7 +62,8 @@ func init() { func BuildGeoIndex() { for i, v := range ITEMS { - v.GeoIndex(i) + err := v.GeoIndex(i) + fmt.Println(err) } defer S2CELLS.Sort() @@ -75,9 +76,6 @@ func (c cellIndexNode) IsEmpty() bool { // GeoIndex for each items determine S2Cell and store it. func (i Item) GeoIndex(label int) error { - lock.Lock() - defer lock.Unlock() - if i.GetGeometry() == "" { return fmt.Errorf("missing wkt geometry") } diff --git a/http_handlers_test.go b/http_handlers_test.go index c737ef9..7e10a33 100644 --- a/http_handlers_test.go +++ b/http_handlers_test.go @@ -21,21 +21,26 @@ import ( // "io" // "net/http" "net/http/httptest" + "net/url" + "strings" "testing" ) /* load some data 19 records*/ func TestMain(m *testing.M) { + defaultSettings() + SETTINGS.Set( "csv", "./testdata/dataselectie_vbo_energie_20210217.head.csv.gz", "test dataset") - SETTINGS.Set("channelwait", "0.001s", "timeout for channel loading") - itemChan := make(ItemsChannel, 1) + SETTINGS.Set("channelwait", "0.01s", "timeout for channel loading") + loadcsv(itemChan) close(itemChan) ItemChanWorker(itemChan) + // Run the test m.Run() } @@ -43,9 +48,11 @@ func TestMain(m *testing.M) { func TestCsvLoading(t *testing.T) { fmt.Println(len(ITEMS)) + size := len(ITEMS) - if size != 19 { - t.Errorf("expected 19 ITEMS got %d", size) + + if size != 9 { + t.Errorf("expected 9 ITEMS got %d", size) } } @@ -70,3 +77,48 @@ func TestBasicHandlers(t *testing.T) { } } } + +func TestGeoQuery(t *testing.T) { + + BuildGeoIndex() + + if len(S2CELLS) == 0 { + t.Error("geo indexing failed") + } + + data := url.Values{} + data.Set("groupby", "postcode") + data.Set("reduce", "count") + + geojson := fmt.Sprint(` +{ + "type": "Polygon", + "coordinates": [ + [ + [4.902321, 52.428306], + [4.90127, 52.427024], + [4.905281, 52.426069], + [4.906782, 52.426226], + [4.906418, 52.427469], + [4.902321, 52.428306] + ] + ] +} + `) + data.Set("geojson", geojson) + + params := strings.NewReader(data.Encode()) + + handler := setupHandler() + req := httptest.NewRequest("POST", "/list/", params) + req.Header.Add("Content-Type", "application/x-www-form-urlencoded") + + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + resp := w.Result() + if resp.StatusCode != 201 { + t.Errorf("request to %s failed", req.URL) + t.Error(resp) + } +} diff --git a/main.go b/main.go index 32055ad..c1e2072 100644 --- a/main.go +++ b/main.go @@ -30,10 +30,13 @@ const ( DebugColorN = "\033[0;36m%s\033[0m\n" ) -func init() {} +func init() { + itemChan = make(ItemsChannel, 1000) +} func loadcsv(itemChan ItemsChannel) { log.Print("loading given csv") + fmt.Println(SETTINGS.Get("delimiter")) err := importCSV(SETTINGS.Get("csv"), itemChan, false, true, SETTINGS.Get("delimiter"), @@ -58,7 +61,7 @@ func loadcsv(itemChan ItemsChannel) { GroupByHeaderCache = make(map[string]HeaderData) } -func main() { +func defaultSettings() { SETTINGS.Set("http_db_host", "0.0.0.0:8000", "host with port") SETTINGS.Set("SHAREDSECRET", "", "jwt shared secret") SETTINGS.Set("JWTENABLED", "y", "JWT enabled") @@ -87,8 +90,9 @@ func main() { SETTINGS.Set("channelwait", "5s", "timeout") SETTINGS.Parse() +} - itemChan := make(ItemsChannel, 1000) +func main() { go ItemChanWorker(itemChan) diff --git a/store.go b/store.go index 6aed56f..1f8fef5 100644 --- a/store.go +++ b/store.go @@ -15,6 +15,7 @@ type ItemsGroupedBy map[string]Items type ItemsChannel chan ItemsIn var ITEMS Items + var itemChan ItemsChannel func init() { diff --git a/testdata/dataselectie_vbo_energie_20210217.head.csv.gz b/testdata/dataselectie_vbo_energie_20210217.head.csv.gz index 55f9a0672fbc951b4a48904e59eddc3e86a364db..1bd72087e3a82385e314b1bfe24c6490743ccbbb 100644 GIT binary patch delta 665 zcmV;K0%rZX1;hmhABzYGixYp52RVO?Pjp7K`m!V++XIvV+8+2gdX!iVs%^>Gaxx6V ze`n<;#jX!2!vWelGl}+7}W?LMDjywkV#rrgPKxKo?fZ>(F{y|X)eKkx03c%xmBuXUFj?Q*SMSM`t% z{5uyjXno)0w#X@_I4%~$(#f*vT+K#yx&1n_f)83(c?@`mGq34Bho$egH(IyjoM!o$ zW!u}bE4%^j$%_Ot!3f5fP@;cGCSs&0;i%zE@JzCuMDg$_f|US?U_wsM5=10NWN$7A zPDz3;tO*O)NaGNO&zleNeRNuP!L)_0(b=aV2FDm(`a$$%XKV#oqc87n=c1E-TkEzA zC+~F^^xB}S&s3rdsDt9wxv%2##qEQxF5g~!T)gDR8BQsq87CN4g}LN1 z6_k=AB!;9gB0S?5362#aN^#JCl-dPwf8DzkRMgJ{c%9iY3XG;C+rg13OH(c}Q~YS?2>0P&oPvJn zgx%u<1*Guw3gd(`GCpA}dLPqC|Bnj#3rK%ZK?5Orm=Jx!gnkJD3**u<{tN&BFa|}K delta 654 zcmV;90&)Gs1-b@~z;3;+Ogu47 zW7-Jy=_684iqS3v|42X?c$H)hNRI&Nd*#EP6CkAp_@4t3BS4I6#KwfqpYQ=&xf|%n zWq?c{g@{lL?@Kj8G)R6FvvCiCV7~#$=hJ~C(JGra=nRh!*t7%5XVrlQK-nA+83B@b z3h4jtKtj)3^!yIX=;Vq|cRU|4Qh)n|aoAU&z^EYE9*z=B2(B>G{AlP1_u*hHK!50j o-QxoVr1A6$ Date: Wed, 21 Apr 2021 18:07:32 +0200 Subject: [PATCH 45/54] working geojson tests, removed some code duplication --- geo.go | 48 +++++++++++++++------------- http_handlers.go | 4 +-- http_handlers_test.go | 73 +++++++++++++++++++++++++++++++++--------- operations.go | 74 ++++++++++++------------------------------- 4 files changed, 107 insertions(+), 92 deletions(-) diff --git a/geo.go b/geo.go index 925e911..2591eef 100644 --- a/geo.go +++ b/geo.go @@ -21,6 +21,7 @@ import ( "github.com/go-spatial/geom" "github.com/go-spatial/geom/encoding/wkt" "github.com/golang/geo/s2" + "log" "sort" "strings" "sync" @@ -63,7 +64,9 @@ func init() { func BuildGeoIndex() { for i, v := range ITEMS { err := v.GeoIndex(i) - fmt.Println(err) + if err != nil { + log.Println(err) + } } defer S2CELLS.Sort() @@ -125,48 +128,49 @@ func (i Item) GeoIndex(label int) error { } +type MatchedItems map[int]bool + +// from map to array remove duplicate matches +func matchesToArray(items *Items, matched MatchedItems) Items { + newItems := make(Items, 0) + for k := range matched { + newItems = append(newItems, (*items)[k]) + } + + return newItems +} + // Simple search algo func SearchOverlapItems(items *Items, cu s2.CellUnion) Items { - s2Lock.RLock() - defer s2Lock.RUnlock() - - newItems := Items{} + matchedItems := make(MatchedItems) - for k, i := range *items { - if cu.ContainsCellID(S2CELLMAP[k]) { - newItems[k] = i + for i := range *items { + l := (*items)[i].Label + if cu.ContainsCellID(S2CELLMAP[l]) { + matchedItems[l] = true } } - return newItems + + return matchesToArray(items, matchedItems) } // Given only a cell Union return Items func SearchGeoItems(cu s2.CellUnion) Items { - newItems := Items{} + matchedItems := make(map[int]bool) cu.Normalize() - //for i, c := range cu { - // fmt.Printf("%d %s \n", i, c) - //} - min := S2CELLS.Seek(cu[0].ChildBegin()) max := S2CELLS.Seek(cu[len(cu)-1].ChildEnd()) - // ITEMS read lock - lock.RLock() - defer lock.RUnlock() - for _, i := range S2CELLS[min : max+1] { if cu.ContainsCellID(i.ID) { - newItems = append(newItems, ITEMS[i.Label]) + matchedItems[i.Label] = true } } - - return newItems - + return matchesToArray(&ITEMS, matchedItems) } // Seek position in index which is close to target diff --git a/http_handlers.go b/http_handlers.go index e960a1b..a164111 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -59,10 +59,10 @@ func isCached(w http.ResponseWriter, r *http.Request, query Query) bool { cacheKey, err := query.CacheKey() if err == nil && len(query.GroupBy) > 0 && len(query.Reduce) > 0 { - cacheLock.Lock() + cacheLock.RLock() groupByResult, found := GroupByBodyCache[cacheKey] headerCache, _ := GroupByHeaderCache[cacheKey] - cacheLock.Unlock() + cacheLock.RUnlock() if found { w.Header().Set("Content-Type", "application/json") diff --git a/http_handlers_test.go b/http_handlers_test.go index 7e10a33..d831a2f 100644 --- a/http_handlers_test.go +++ b/http_handlers_test.go @@ -17,15 +17,18 @@ package main import ( + "encoding/json" "fmt" // "io" - // "net/http" + "net/http" "net/http/httptest" "net/url" "strings" "testing" ) +var handler http.Handler + /* load some data 19 records*/ func TestMain(m *testing.M) { @@ -41,6 +44,8 @@ func TestMain(m *testing.M) { close(itemChan) ItemChanWorker(itemChan) + handler = setupHandler() + // Run the test m.Run() } @@ -51,19 +56,21 @@ func TestCsvLoading(t *testing.T) { size := len(ITEMS) - if size != 9 { - t.Errorf("expected 9 ITEMS got %d", size) + if size != 10 { + t.Errorf("expected 10 ITEMS got %d", size) } } func TestBasicHandlers(t *testing.T) { - handler := setupHandler() + if len(ITEMS) < 10 { + t.Error("no items") + } urls := []string{ "/list/", - "/typeahead/pid/?search=1", - "/help/", + //"/typeahead/pid/?search=1", + //"/help/", } for i := range urls { @@ -78,14 +85,23 @@ func TestBasicHandlers(t *testing.T) { } } +// Test geojson queries combined with groupby and reduce. func TestGeoQuery(t *testing.T) { BuildGeoIndex() + if len(ITEMS) < 10 { + t.Error("no items") + } + if len(S2CELLS) == 0 { t.Error("geo indexing failed") } + if len(S2CELLMAP) == 0 { + t.Error("geo indexing failed") + } + data := url.Values{} data.Set("groupby", "postcode") data.Set("reduce", "count") @@ -95,12 +111,11 @@ func TestGeoQuery(t *testing.T) { "type": "Polygon", "coordinates": [ [ - [4.902321, 52.428306], - [4.90127, 52.427024], - [4.905281, 52.426069], - [4.906782, 52.426226], - [4.906418, 52.427469], - [4.902321, 52.428306] + [4.905321, 52.377706], + [4.90527, 52.377706], + [4.90527, 52.377869], + [4.905321, 52.377869], + [4.905321, 52.377706] ] ] } @@ -109,16 +124,44 @@ func TestGeoQuery(t *testing.T) { params := strings.NewReader(data.Encode()) - handler := setupHandler() req := httptest.NewRequest("POST", "/list/", params) req.Header.Add("Content-Type", "application/x-www-form-urlencoded") w := httptest.NewRecorder() + handler.ServeHTTP(w, req) resp := w.Result() - if resp.StatusCode != 201 { - t.Errorf("request to %s failed", req.URL) + if resp.StatusCode != 200 { + t.Errorf("geo request to %s failed statuscode", req.URL) t.Error(resp) } + + headerQuery := resp.Header.Get("Query") + query := Query{} + json.Unmarshal([]byte(headerQuery), &query) + + if query.GeometryGiven != true { + t.Errorf("geo request to %s failed ", req.URL) + t.Error(resp.Header.Get("Query")) + // t.Error(resp.Header.Get("GeometryGiven")) + t.Error(resp.Body) + } + + if resp.Header.Get("Total-Items") != "7" { + t.Error("geo request count is not 7") + } + + // parse json GroupBy response + defer resp.Body.Close() + j := GroupByResult{} + err := json.NewDecoder(resp.Body).Decode(&j) + + if err != nil { + t.Error(err) + } + + if j["1011AB"]["count"] != "7" { + t.Error("geo request json response count is not 7") + } } diff --git a/operations.go b/operations.go index d29af2b..6d6a4a5 100644 --- a/operations.go +++ b/operations.go @@ -118,17 +118,6 @@ func parseURLParameters(r *http.Request) (Query, error) { groupBy := "" reduce := "" - //TODO change query to be based on input. - - // parse params and body posts // (geo)json data - r.ParseForm() - - if SETTINGS.Get("debug") == "yes" { - for key, value := range r.Form { - fmt.Printf("F %s = %s\n", key, value) - } - } - // parse params and body posts // (geo)json data r.ParseForm() @@ -221,8 +210,8 @@ func parseURLParameters(r *http.Request) (Query, error) { if geometryGiven && geometryS[0] != "" { err := json.Unmarshal([]byte(geometryS[0]), &geoinput) if err != nil { - fmt.Println("parsing geojson error") - fmt.Println(err) + log.Println("parsing geojson error") + log.Println(err) geometryGiven = false return Query{}, errors.New("failed to parse geojson") } @@ -479,7 +468,7 @@ func bitArrayFilter( if len(combinedBitArrays) > 0 { bitArrayResult = combinedBitArrays[0] } else { - log.Println("no bitarrays found") + log.Println("no bitarrays found / used") return nil, errors.New("no bitarray found") } @@ -509,16 +498,15 @@ func bitArrayFilter( func runQuery(items *Items, query Query, operations GroupedOperations) (Items, int64) { start := time.Now() - var newItems Items if query.GeometryGiven { cu := CoverDefault(query.Geometry) if len(cu) == 0 { - fmt.Println("covering cell union not created") + log.Println("covering cell union not created") } else { geoitems := SearchGeoItems(cu) items = &geoitems - fmt.Println(len(geoitems)) + // log.Printf("geo matched %d \n", len(geoitems)) } } @@ -544,22 +532,7 @@ func runQuery(items *Items, query Query, operations GroupedOperations) (Items, i } } - if query.EarlyExit() { - newItems = filteredEarlyExit(nextItems, operations, query) - } else { - newItems = filtered(nextItems, operations, query) - } - - if query.GeometryGiven { - cu := CoverDefault(query.Geometry) - if len(cu) == 0 { - fmt.Println("covering cell union not created") - } else { - geoitems := SearchOverlapItems(items, cu) - items = &geoitems - fmt.Println(len(geoitems)) - } - } + var newItems Items if query.EarlyExit() { newItems = filteredEarlyExit(nextItems, operations, query) @@ -568,6 +541,9 @@ func runQuery(items *Items, query Query, operations GroupedOperations) (Items, i } diff := time.Since(start) + + log.Printf("items matched %d \n", len(newItems)) + return newItems, int64(diff) / int64(1000000) } @@ -616,29 +592,20 @@ func mapIndex(items Items, indexes []int) Items { type HeaderData map[string]string func getHeaderData(items Items, query Query, queryDuration int64) HeaderData { - headerData := make(HeaderData) - - if query.LimitGiven { - headerData["Limit"] = strconv.Itoa(query.Limit) - } - - if query.PageGiven { - headerData["Page"] = strconv.Itoa(query.Page) - headerData["Page-Size"] = strconv.Itoa(query.PageSize) - headerData["Total-Pages"] = strconv.Itoa((len(items) / query.PageSize) + 1) - } - - headerData["Cache-Control"] = "public, max-age=300" - headerData["Total-Items"] = strconv.Itoa(len(items)) - headerData["Query-Duration"] = strconv.FormatInt(queryDuration, 10) + "ms" - bytesQuery, _ := json.Marshal(query) - headerData["query"] = string(bytesQuery) - + matched := int64(len(items)) + headerData := getHeaderDataShared(query, queryDuration, matched) return headerData } //getHeaderDataSlice extract from header information with data slice we want func getHeaderDataSlice(items []string, query Query, queryDuration int64) HeaderData { + matched := int64(len(items)) + headerData := getHeaderDataShared(query, queryDuration, matched) + return headerData +} + +func getHeaderDataShared(query Query, queryDuration int64, matched int64) HeaderData { + headerData := make(HeaderData) if query.LimitGiven { @@ -648,10 +615,11 @@ func getHeaderDataSlice(items []string, query Query, queryDuration int64) Header if query.PageGiven { headerData["Page"] = strconv.Itoa(query.Page) headerData["Page-Size"] = strconv.Itoa(query.PageSize) - headerData["Total-Pages"] = strconv.Itoa((len(items) / query.PageSize) + 1) + headerData["Total-Pages"] = strconv.Itoa(int(matched)/query.PageSize + 1) } - headerData["Total-Items"] = strconv.Itoa(len(items)) + headerData["Total-Items"] = strconv.FormatInt(matched, 10) + headerData["Cache-Control"] = "public, max-age=300" headerData["Query-Duration"] = strconv.FormatInt(queryDuration, 10) + "ms" bytesQuery, _ := json.Marshal(query) headerData["query"] = string(bytesQuery) From 76c0d6bf32ad7d43f9dde7ffc5acced8ac4729d8 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Wed, 21 Apr 2021 22:42:45 +0200 Subject: [PATCH 46/54] add storage test, create example requests --- curlgeotest.sh => examples/curlgeotest.sh | 0 curltest.sh => examples/curltest.sh | 0 http_handlers_test.go | 6 ++---- storage_operations.go | 26 +++++++++-------------- 4 files changed, 12 insertions(+), 20 deletions(-) rename curlgeotest.sh => examples/curlgeotest.sh (100%) rename curltest.sh => examples/curltest.sh (100%) diff --git a/curlgeotest.sh b/examples/curlgeotest.sh similarity index 100% rename from curlgeotest.sh rename to examples/curlgeotest.sh diff --git a/curltest.sh b/examples/curltest.sh similarity index 100% rename from curltest.sh rename to examples/curltest.sh diff --git a/http_handlers_test.go b/http_handlers_test.go index d831a2f..c0a42be 100644 --- a/http_handlers_test.go +++ b/http_handlers_test.go @@ -52,8 +52,6 @@ func TestMain(m *testing.M) { func TestCsvLoading(t *testing.T) { - fmt.Println(len(ITEMS)) - size := len(ITEMS) if size != 10 { @@ -69,8 +67,8 @@ func TestBasicHandlers(t *testing.T) { urls := []string{ "/list/", - //"/typeahead/pid/?search=1", - //"/help/", + "/typeahead/pid/?search=1", + "/help/", } for i := range urls { diff --git a/storage_operations.go b/storage_operations.go index 46728dd..c89fd86 100644 --- a/storage_operations.go +++ b/storage_operations.go @@ -4,7 +4,7 @@ import ( "bytes" "compress/gzip" "encoding/gob" - // "encoding/json" + "encoding/json" "fmt" "io/ioutil" "log" @@ -30,21 +30,19 @@ func init() { STORAGEFUNCS = make(storageFuncs) STORAGEFUNCS["bytes"] = saveAsBytes // currently default STORAGEFUNCS["bytesz"] = saveAsBytesCompressed - // STORAGEFUNCS["json"] = saveAsJsonZipped - // STORAGEFUNCS["jsonz"] = saveAsJsonZipped + STORAGEFUNCS["json"] = saveAsJsonZipped RETRIEVEFUNCS = make(retrieveFuncs) RETRIEVEFUNCS["bytes"] = loadAsBytes // currently default RETRIEVEFUNCS["bytesz"] = loadAsBytesCompressed - // RETRIEVEFUNCS["json"] = loadAsJsonZipped - // RETRIEVEFUNCS["jsonz"] = loadAsJsonZipped + RETRIEVEFUNCS["json"] = loadAsJsonZipped } -/* -func saveAsJsonZipped(items Items, filename string) (int64, error) { +func saveAsJsonZipped(filename string) (int64, error) { + store := makeStore() var b bytes.Buffer writer := gzip.NewWriter(&b) - itemJSON, _ := json.Marshal(ITEMS) + itemJSON, _ := json.Marshal(store) writer.Write(itemJSON) writer.Flush() writer.Close() @@ -60,7 +58,6 @@ func saveAsJsonZipped(items Items, filename string) (int64, error) { size := fi.Size() return size, nil } -*/ func makeStore() Store { return Store{ITEMS, CreateMapstore()} @@ -174,8 +171,7 @@ func loadAsBytesCompressed(filename string) (int, error) { return len(ITEMS), nil } -/* -func loadAsJsonZipped(items Items, filename string) (int, error) { +func loadAsJsonZipped(filename string) (int, error) { fi, err := os.Open(filename) if err != nil { _, err2 := os.Getwd() @@ -192,21 +188,19 @@ func loadAsJsonZipped(items Items, filename string) (int, error) { } defer fz.Close() - // TODO buffered instead of one big chunk s, err := ioutil.ReadAll(fz) if err != nil { return 0, err } - ITEMS = make(Items, 0, 100*1000) - json.Unmarshal(s, &ITEMS) - + store := makeStore() + json.Unmarshal(s, &store) + restoreStore(store) // GC friendly s = nil return len(ITEMS), nil } -*/ func loadAtStart(storagename string, filename string, indexed bool) { From cc247ce673609a79e51d7b7c17402dc1af8891ab Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 27 Apr 2021 01:07:40 +0200 Subject: [PATCH 47/54] wip: rewrite model creation, code, added column.go code --- .gitignore | 3 + column.go | 125 +++ extras/config.yaml | 30 + extras/create_model.py | 121 ++- extras/templates/initColumn.template.jinja2 | 4 +- .../loadRepeatColumn.template.jinja2 | 5 + extras/templates/model.template.jinja2 | 74 +- extras/templates/modelmap.template.jinja2 | 51 + extras/templates/registerFilters.jinja2 | 3 - extras/templates/repeatColumn.template.jinja2 | 5 + extras/templates/shrinkVars.jinja2 | 6 +- geo.go | 8 +- http_handlers.go | 8 +- http_handlers_test.go | 33 +- main.go | 4 +- model.go | 879 ++++-------------- model_maps.go | 455 ++------- operations.go | 40 +- storage_operations.go | 20 +- storage_operations_test.go | 44 + store.go | 12 +- 21 files changed, 684 insertions(+), 1246 deletions(-) create mode 100644 column.go create mode 100644 extras/config.yaml create mode 100644 extras/templates/loadRepeatColumn.template.jinja2 create mode 100644 extras/templates/modelmap.template.jinja2 create mode 100644 extras/templates/repeatColumn.template.jinja2 create mode 100644 storage_operations_test.go diff --git a/.gitignore b/.gitignore index 80abf77..847e8af 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ extras/model.go +testdata/* +*.gz +lambdadb diff --git a/column.go b/column.go new file mode 100644 index 0000000..722dea1 --- /dev/null +++ b/column.go @@ -0,0 +1,125 @@ +package main + +import ( + "errors" + "github.com/Workiva/go-datastructures/bitarray" + "log" +) + +type fieldIdxMap map[string]uint32 +type IdxFieldMap map[uint32]string + +type MappedColumn struct { + Idx fieldIdxMap + Field IdxFieldMap + IdxTracker uint32 +} + +type ColumnRegister map[string]MappedColumn + +var RepeatedColumns ColumnRegister + +func init() { + RepeatedColumns = make(ColumnRegister) +} + +func NewReapeatedColumn(column string) MappedColumn { + m := MappedColumn{ + make(fieldIdxMap), + make(IdxFieldMap), + 0, + } + RepeatedColumns[column] = m + return m +} + +// Store field name as idx value and idx as field value +func (m *MappedColumn) Store(field string) { + + if _, ok := m.Idx[field]; !ok { + m.Idx[field] = m.IdxTracker + m.Field[m.IdxTracker] = field + m.IdxTracker++ + } +} + +// Store Array field (postgres Array). +func (m *MappedColumn) StoreArray(field string) []uint32 { + + fieldsArray := make([]uint32, 0) + + // parsing {a, b} array values + // string should be at least 2 example "{}" == size 2 + if len(field) > 2 { + fields, err := ParsePGArray(field) + + if err != nil { + log.Fatal(err, "error parsing array ") + } + + for _, gd := range fields { + m.Store(gd) + } + + for _, v := range fields { + fieldsArray = append(fieldsArray, Gebruiksdoelen.GetIndex(v)) + } + } + return fieldsArray +} + +func (m *MappedColumn) GetValue(idx uint32) string { + return m.Field[idx] +} + +func (m *MappedColumn) GetIndex(s string) uint32 { + return m.Idx[s] +} + +// SetBitArray WIP +func SetBitArray(column string, i uint32, label int) { + + var ba bitarray.BitArray + var ok bool + + // check if map of bitmaps is present for column + var map_ba fieldBitarrayMap + if _, ok = BitArrays[column]; !ok { + map_ba := make(fieldBitarrayMap) + BitArrays[column] = map_ba + } + + map_ba = BitArrays[column] + + // check for existing bitarray for i value + ba, ok = map_ba[i] + if !ok { + ba = bitarray.NewSparseBitArray() + map_ba[i] = ba + } + // set bit for item label. + ba.SetBit(uint64(label)) +} + +func GetBitArray(column, value string) (bitarray.BitArray, error) { + + var ok bool + + if _, ok = BitArrays[column]; !ok { + return nil, errors.New("no bitarray filter found for column " + column) + } + + bpi, ok := RepeatedColumns[column].Idx[value] + + if !ok { + return nil, errors.New("no bitarray filter found for column value WoningType") + } + + ba, ok := BitArrays[column][bpi] + + if !ok { + return nil, errors.New("no bitarray filter found for column idx value WoningType") + } + + return ba, nil +} diff --git a/extras/config.yaml b/extras/config.yaml new file mode 100644 index 0000000..f970d83 --- /dev/null +++ b/extras/config.yaml @@ -0,0 +1,30 @@ +model: + adres: u + buurtcode: b + buurtnaam: r + gebruiksdoelen: r + gemeentecode: b + gemeentenaam: r + group_id_2020: u + index: '0' + labelscore_definitief: b + labelscore_voorlopig: b + numid: u + oppervlakte: u + p6_gas_aansluitingen_2020: u + p6_gasm3_2020: u + p6_kwh_2020: u + p6_totaal_pandoppervlak_m2: u + pand_bouwjaar: u + pand_gas_aansluitingen: u + pand_gas_ean_aansluitingen: u + pid: u + point: g + postcode: u + provinciecode: b + provincienaam: r + vid: u + wijkcode: b + wijknaam: r + woning_type: b + woningequivalent: u diff --git a/extras/create_model.py b/extras/create_model.py index 31a2177..623c1e2 100644 --- a/extras/create_model.py +++ b/extras/create_model.py @@ -22,19 +22,35 @@ import csv import sys +import os from re import sub from jinja2 import Environment, FileSystemLoader +import yaml + + if '-f' in sys.argv: filename = str(sys.argv[sys.argv.index('-f')+1]) else: filename = "items.csv" +if '-c' in sys.argv: + config = str(sys.argv[sys.argv.index('-c')+1]) +else: + config = "config.yaml" + with open(filename) as f: reader = csv.DictReader(f) row = dict(next(reader)) +cfg = {} + +if os.path.isfile(config): + with open(config, 'r') as stream: + cfg = yaml.load(stream)['model'] + + env = Environment( loader=FileSystemLoader('./templates'), ) @@ -73,13 +89,21 @@ def gocamelCase(string): options = ['r', 'u', 'i', 'g', 'b'] while True: - # keep asking for valid input - q1 = ( - "(R)epeated value? has less then (2^16=65536) option.", - "(B)itarray, repeated column optimized for fast match.", - "(U)nique, (G)eo lat/lon point or (I)gnore ? r/b/u/g/i?." - ) - action = input(f"idx:{index} is {k} {q1}") # noqa + + action = None + + if cfg.get(k): + print(f"reading from config {k} {cfg[k]}") + action = cfg[k] + else: + # keep asking for valid input + q1 = ( + "(R)epeated value? has less then (2^16=65536) option.", + "(B)itarray, repeated column optimized for fast match.", + "(U)nique, (G)eo lat/lon point or (I)gnore ? r/b/u/g/i?." + ) + action = input(f"idx:{index} is {k} {q1}") # noqa + if action == '': print(f"pick one from {options}") continue @@ -87,6 +111,8 @@ def gocamelCase(string): continue break + cfg[k] = action + if action == 'r': repeated.append(kc) repeated_org.append(k) @@ -117,8 +143,15 @@ def gocamelCase(string): # ask for a index column while True: + index = None # keep asking for valid input - index = input(f"which column is idx? 0 - {len(allcolumns) - 1} ") + if cfg.get('index'): + index = cfg['index'] + else: + index = input(f"which column is idx? 0 - {len(allcolumns) - 1} ") + + cfg['index'] = index + try: index = int(index) @@ -134,24 +167,41 @@ def gocamelCase(string): print('try again..') +# save answers in config file +with open(config, 'w') as f: + dict_file = {'model': cfg} + yaml.dump(dict_file, f) + print(f'saved answers in config {config}') + + # setup initial data structs for each repeated column initRepeatColumns = [] -initColumntemplate = env.get_template('initColumn.template.jinja2') +repeatColumnNames = [] +loadRepeatColumnNames = [] -for c in repeated: - initRepeatColumns.append( - initColumntemplate.render( - columnName=c, bitarraymap=c in bitarray) - ) +for columnName in repeated: + initRow = f"\t {columnName} = NewReapeatedColumn()\n" + initRepeatColumns.append(initRow) + + repeatRow = f"\t {columnName} \n" + repeatColumnNames.append(repeatRow) + + loadRow = f"\t {columnName} = m.{columnName} \n" + loadRepeatColumnNames.append(loadRow) + + +# setup initial data structs for each bitarray column +initBitarrays = [] +for columnName in bitarray: + onerow = f"\t {columnName}Items = make(fieldItemsMap)\n" + initBitarrays.append(onerow) # create bitarrays with item labels for column values. bitArrayStores = [] -bitArrayGetters = [] -bitArrayStoreTemplate = env.get_template('storebitarray.template.jinja2') -bitArrayGetTemplate = env.get_template('bitarrayGetter.template.jinja2') -for r in bitarray: - bitArrayStores.append(bitArrayStoreTemplate.render(columnName=r)) - bitArrayGetters.append(bitArrayGetTemplate.render(columnName=r)) +for c1, c2 in zip(bitarray, bitarray_org): + onerow = f'\tSetBitArray("c2", i.{c1}, i.Label)\n' + bitArrayStores.append(onerow) + # create ItemFull struct fields columnsItemIn = [] @@ -190,10 +240,11 @@ def gocamelCase(string): shrinkItems = [] shrinkvartemplate = env.get_template('shrinkVars.jinja2') shrinktemplate = env.get_template('shrinkColumn.jinja2') + for c in repeated: shrinkVars.append( shrinkvartemplate.render(column=c, bitarray=c in bitarray)) - shrinkItems.append(shrinktemplate.render(column=c)) + shrinkItems.append(f"\t {c}.Store(i.{c})\n") # create the actual shrinked/expand Item fields. @@ -207,9 +258,9 @@ def gocamelCase(string): if c in repeated: # string to unint - shrinkItemFields.append(f"\t\t{c}IdxMap[i.{c}],\n") + shrinkItemFields.append(f"\t\t{c}.GetIndex(i.{c}),\n") # unint back to string - expandItemFields.append(f"\t\t{c}[i.{c}],\n") + expandItemFields.append(f"\t\t{c}.GetValue(i.{c}),\n") else: shrinkItemFields.append(f"\t\ti.{c},\n") expandItemFields.append(f"\t\ti.{c},\n") @@ -280,6 +331,7 @@ def gocamelCase(string): # Finally render the model.go template modeltemplate = env.get_template('model.template.jinja2') +mapstemplate = env.get_template('modelmap.template.jinja2') geometryGetter = '""' print('GEOCOLUMNS: ' + " ".join(geocolumns)) @@ -287,11 +339,11 @@ def gocamelCase(string): geometryGetter = f"Getters{geocolumns[0]}(&i)" output = modeltemplate.render( - initRepeatColumns=''.join(initRepeatColumns), + #initRepeatColumns=''.join(initRepeatColumns), columnsItemIn=''.join(columnsItemIn), columnsItemOut=''.join(columnsItemOut), columnsItem=''.join(columnsItem), - shrinkVars=''.join(shrinkVars), + # shrinkVars=''.join(shrinkVars), shrinkItems=''.join(shrinkItems), shrinkItemFields=''.join(shrinkItemFields), expandItemFields=''.join(expandItemFields), @@ -304,12 +356,27 @@ def gocamelCase(string): indexcolumn=allcolumns[index], geometryGetter=geometryGetter, bitArrayStores=''.join(bitArrayStores), - bitArrayGetters=''.join(bitArrayGetters), ) f = open('model.go', 'w') f.write(output) f.close() - print('saved in model.go') print('!!NOTE!! edit the default search filter') + + +mapsoutput = mapstemplate.render( + initRepeatColumns=''.join(initRepeatColumns), + repeatColumnNames = ''.join(repeatColumnNames), + loadRepeatColumnNames = ''.join(loadRepeatColumnNames), + initBitarrays=''.join(initBitarrays), + shrinkVars=''.join(shrinkVars), + +) + +f = open('modelmaps.go', 'w') +f.write(mapsoutput) +f.close() +print('model hashmaps saved in modelmaps.go') + + diff --git a/extras/templates/initColumn.template.jinja2 b/extras/templates/initColumn.template.jinja2 index 66a4e25..edf5b81 100644 --- a/extras/templates/initColumn.template.jinja2 +++ b/extras/templates/initColumn.template.jinja2 @@ -2,6 +2,4 @@ {{columnName}}Tracker = 0 {{columnName}}IdxMap = make(fieldIdxMap) {{columnName}} = make(fieldMapIdx) -{% if bitarraymap %} - {{columnName}}Items = make(fieldItemsMap) -{% endif %} + diff --git a/extras/templates/loadRepeatColumn.template.jinja2 b/extras/templates/loadRepeatColumn.template.jinja2 new file mode 100644 index 0000000..d460706 --- /dev/null +++ b/extras/templates/loadRepeatColumn.template.jinja2 @@ -0,0 +1,5 @@ + + {{columnName}}Tracker = m.{{columnName}}Tracker + {{columnName}}IdxMap = m.{{columnName}}IdxMap + {{columnName}} = m.{{columnName}} + diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index 0afb6c8..e974cca 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -46,38 +46,8 @@ type fieldIdxMap map[string]uint16 type fieldMapIdx map[uint16]string type fieldItemsMap map[uint16]bitarray.BitArray -// Column maps. -// Store for each non distinct/repeated column -// unit16 -> string map and -// string -> unit16 map -// track count of distinct values +type IndexMap[string]int64 -{{shrinkVars}} - -/* -var {columnname}Tracker uint16 -var {columnname}IdxMap fieldIdxMap -var {columnname} fieldMapIdx -var {columnname}Items fieldItemmap -*/ - -// item map lock -var lock = sync.RWMutex{} - -// bitArray Lock -var balock = sync.RWMutex{} - - -func init() { - - {{initRepeatColumns}} - - /* - labelscoredefinitiefTracker = 0 - labelscoredefinitiefIdxMap = make(fieldIdxMap) - labelscoredefinitief = make(fieldMapIdx) - */ -} {{itemStructs}} @@ -109,8 +79,8 @@ func (i Item) MarshalJSON() ([]byte, error) { // Shrink create smaller Item using uint16 func (i ItemIn) Shrink(label int) Item { - lock.Lock() - defer lock.Unlock() + // Hashmap lookup for index + IndexMap[i.{{indexcolumn}}] = label {{shrinkItems}} @@ -126,38 +96,10 @@ func (i ItemIn) Shrink(label int) Item { // Store selected columns in seperate map[columnvalue]bitarray // for gast item lookup func (i Item) StoreBitArrayColumns() { - - balock.Lock() - defer balock.Unlock() - - lock.RLock() - defer lock.RUnlock() - - {% if bitArrayStores is defined %} - var ba bitarray.BitArray - var ok bool - {{ bitArrayStores }} - - {% endif %} - - /* - // Column Buurtcode has byte arrays for - ba, ok = BuurtcodeItems[i.Buurtcode] - if !ok { - ba = bitarray.NewSparseBitArray() - BuurtcodeItems[i.Buurtcode] = ba - } - ba.SetBit(uint64(i.Label)) - */ - } func (i Item) Serialize() ItemOut { - - lock.RLock() - defer lock.RUnlock() - return ItemOut{ {{expandItemFields}} @@ -184,9 +126,6 @@ func (i ItemOut) Columns() []string { func (i Item) Row() []string { - lock.RLock() - defer lock.RUnlock() - return []string{ {{expandItemFields}} @@ -266,16 +205,12 @@ func validateRegisters() error { return nil } -{{bitArrayGetters}} - - func init() { RegisterFuncMap = make(registerFuncType) RegisterGroupBy = make(registerGroupByFunc) RegisterGetters = make(registerGettersMap) RegisterReduce = make(registerReduce) - RegisterBitArray = make(registerBitArray) // register search filter. //RegisterFuncMap["search"] = 'EDITYOURSELF' @@ -325,9 +260,6 @@ func createSort(items Items) sortLookup { func sortBy(items Items, sortingL []string) (Items, []string) { - lock.Lock() - defer lock.Unlock() - sortFuncs := createSort(items) for _, sortFuncName := range sortingL { diff --git a/extras/templates/modelmap.template.jinja2 b/extras/templates/modelmap.template.jinja2 new file mode 100644 index 0000000..acb1bbc --- /dev/null +++ b/extras/templates/modelmap.template.jinja2 @@ -0,0 +1,51 @@ +/* + Transforming ItemsIn -> Items -> ItemsOut + Where Items has column values ar integers to save memmory + maps are needed to restore integers back to the actual values. + those are generated and stored here. +*/ + +package main + +type ModelMaps struct { + + {{MappedColumns}} + +} + + +{{shrinkVars}} + + +func initBitarrays() { + +{{initBitarrays}} + +} + +func init() { + setUpRepeatedColumns() +} + + +func setUpRepeatedColumns() { + initBitarrays() + + {{initRepeatColumns}} + +} + + +func CreateMapstore() ModelMaps { + return ModelMaps{ + + {{repeatColumnNames}} + + } +} + + +func LoadMapstore(m ModelMaps) { + + {{loadRepeatColumnNames}} +} diff --git a/extras/templates/registerFilters.jinja2 b/extras/templates/registerFilters.jinja2 index 24f4264..52bba4b 100644 --- a/extras/templates/registerFilters.jinja2 +++ b/extras/templates/registerFilters.jinja2 @@ -5,7 +5,4 @@ RegisterFuncMap["startswith-{{co}}"] = Filter{{columnName}}StartsWith RegisterGetters["{{co}}"] = Getters{{columnName}} RegisterGroupBy["{{co}}"] = Getters{{columnName}} -{% if bitarray %} - RegisterBitArray["{{co}}"] = GetBitArray{{columnName}} -{% endif %} diff --git a/extras/templates/repeatColumn.template.jinja2 b/extras/templates/repeatColumn.template.jinja2 new file mode 100644 index 0000000..a8a02fd --- /dev/null +++ b/extras/templates/repeatColumn.template.jinja2 @@ -0,0 +1,5 @@ + + {{columnName}}Tracker, + {{columnName}}IdxMap, + {{columnName}}, + diff --git a/extras/templates/shrinkVars.jinja2 b/extras/templates/shrinkVars.jinja2 index 11684a1..9dacdfc 100644 --- a/extras/templates/shrinkVars.jinja2 +++ b/extras/templates/shrinkVars.jinja2 @@ -1,8 +1,6 @@ -var {{column}}Tracker uint16 -var {{column}}IdxMap fieldIdxMap -var {{column}} fieldMapIdx +var {{column}} MappedColumn {% if bitarray %} -var {{column}}Items fieldItemsMap +var {{column}}Items fieldBitarrayMap {% endif %} diff --git a/geo.go b/geo.go index 2591eef..c454488 100644 --- a/geo.go +++ b/geo.go @@ -24,14 +24,14 @@ import ( "log" "sort" "strings" - "sync" + // "sync" ) var minLevel int var maxLevel int var maxCells int -var s2Lock = sync.RWMutex{} +// var s2Lock = sync.RWMutex{} type cellIndexNode struct { ID s2.CellID @@ -99,8 +99,8 @@ func (i Item) GeoIndex(label int) error { return fmt.Errorf("geom error") } - s2Lock.Lock() - defer s2Lock.Unlock() + // s2Lock.Lock() + // defer s2Lock.Unlock() y := p[0][0] x := p[0][1] diff --git a/http_handlers.go b/http_handlers.go index a164111..32b2b80 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -5,7 +5,7 @@ import ( "encoding/json" "fmt" "index/suffixarray" - //"io/ioutil" + // "io/ioutil" "log" "net/http" "runtime" @@ -425,6 +425,11 @@ func MIDDLEWARE(cors bool) func(http.Handler) http.Handler { return corsEnabled } + // make sure items are not being modified during request + // otherwise wait.. + lock.RLock() + defer lock.RUnlock() + return passThrough } @@ -478,6 +483,7 @@ func contextTypeAheadRest(JWTConig jwtConfig, itemChan ItemsChannel, operations if column[len(column)-1] == '/' { column = column[:len(column)-1] } + /* if _, ok := operations.Getters[column]; !ok { w.WriteHeader(404) diff --git a/http_handlers_test.go b/http_handlers_test.go index c0a42be..f4800c5 100644 --- a/http_handlers_test.go +++ b/http_handlers_test.go @@ -65,19 +65,38 @@ func TestBasicHandlers(t *testing.T) { t.Error("no items") } - urls := []string{ - "/list/", - "/typeahead/pid/?search=1", - "/help/", + type testCase struct { + url string + expected string } - for i := range urls { - req := httptest.NewRequest("GET", urls[i], nil) + tests := []testCase{ + testCase{"/list/?search=1", "10"}, + testCase{"/typeahead/gemeentecode/?search=1", "2"}, + testCase{"/typeahead/pid/?search=1", "2"}, + testCase{"/help/", ""}, + } + + for i := range tests { + req := httptest.NewRequest("GET", tests[i].url, nil) w := httptest.NewRecorder() handler.ServeHTTP(w, req) resp := w.Result() if resp.StatusCode != 200 { - t.Errorf("request to %s failed", urls[i]) + t.Errorf("request to %s failed", tests[i].url) + t.Error(resp) + } + + if tests[i].expected == "" { + continue + } + + if resp.Header.Get("Total-Items") != tests[i].expected { + t.Errorf("total hits mismatch from %s %s != %s", + tests[i].url, + tests[i].expected, + resp.Header.Get("Total-Items"), + ) t.Error(resp) } } diff --git a/main.go b/main.go index c1e2072..041a6e3 100644 --- a/main.go +++ b/main.go @@ -54,9 +54,9 @@ func loadcsv(itemChan ItemsChannel) { // S2CELLS.Sort() fmt.Println("csv imported") + // Empty cache. should be made more generic cacheLock.Lock() defer cacheLock.Unlock() - GroupByBodyCache = make(map[string]GroupByResult) GroupByHeaderCache = make(map[string]HeaderData) } @@ -94,6 +94,8 @@ func defaultSettings() { func main() { + defaultSettings() + go ItemChanWorker(itemChan) if SETTINGS.Get("csv") != "" { diff --git a/model.go b/model.go index a6a8fcb..9bbebe0 100644 --- a/model.go +++ b/model.go @@ -28,7 +28,6 @@ package main import ( "encoding/json" "errors" - "log" "sort" "strconv" "strings" @@ -41,13 +40,10 @@ type registerGettersMap map[string]func(*Item) string type registerReduce map[string]func(Items) map[string]string type registerBitArray map[string]func(s string) (bitarray.BitArray, error) - -type fieldIdxMap map[string]uint16 -type fieldMapIdx map[uint16]string -type fieldItemsMap map[uint16]bitarray.BitArray +type fieldBitarrayMap map[uint32]bitarray.BitArray func init() { - setUpMaps() + setUpRepeatedColumns() } type ItemIn struct { @@ -121,27 +117,27 @@ type Item struct { Oppervlakte string Woningequivalent string Adres string - WoningType uint16 - LabelscoreVoorlopig uint16 - LabelscoreDefinitief uint16 - Gemeentecode uint16 - Gemeentenaam uint16 - Buurtcode uint16 - Buurtnaam uint16 - Wijkcode uint16 - Wijknaam uint16 - Provinciecode uint16 - Provincienaam uint16 + WoningType uint32 + LabelscoreVoorlopig uint32 + LabelscoreDefinitief uint32 + Gemeentecode uint32 + Gemeentenaam uint32 + Buurtcode uint32 + Buurtnaam uint32 + Wijkcode uint32 + Wijknaam uint32 + Provinciecode uint32 + Provincienaam uint32 Point string - PandGasEanAansluitingen uint16 + PandGasEanAansluitingen uint32 GroupId2020 string - P6GasAansluitingen2020 uint16 - P6Gasm32020 uint16 - P6Kwh2020 uint16 + P6GasAansluitingen2020 uint32 + P6Gasm32020 uint32 + P6Kwh2020 uint32 P6TotaalPandoppervlakM2 string - PandBouwjaar uint16 - PandGasAansluitingen uint16 - Gebruiksdoelen []uint16 + PandBouwjaar uint32 + PandGasAansluitingen uint32 + Gebruiksdoelen []uint32 } func (i Item) MarshalJSON() ([]byte, error) { @@ -151,235 +147,25 @@ func (i Item) MarshalJSON() ([]byte, error) { // Shrink create smaller Item using uint16 func (i ItemIn) Shrink(label int) Item { - lock.Lock() - defer lock.Unlock() - - //check if column value is already present - //else store new key - if _, ok := WoningTypeIdxMap[i.WoningType]; !ok { - // store WoningType in map at current index of tracker - WoningType[WoningTypeTracker] = i.WoningType - // store key - idx - WoningTypeIdxMap[i.WoningType] = WoningTypeTracker - // increase tracker - WoningTypeTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := LabelscoreVoorlopigIdxMap[i.LabelscoreVoorlopig]; !ok { - // store LabelscoreVoorlopig in map at current index of tracker - LabelscoreVoorlopig[LabelscoreVoorlopigTracker] = i.LabelscoreVoorlopig - // store key - idx - LabelscoreVoorlopigIdxMap[i.LabelscoreVoorlopig] = LabelscoreVoorlopigTracker - // increase tracker - LabelscoreVoorlopigTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := LabelscoreDefinitiefIdxMap[i.LabelscoreDefinitief]; !ok { - // store LabelscoreDefinitief in map at current index of tracker - LabelscoreDefinitief[LabelscoreDefinitiefTracker] = i.LabelscoreDefinitief - // store key - idx - LabelscoreDefinitiefIdxMap[i.LabelscoreDefinitief] = LabelscoreDefinitiefTracker - // increase tracker - LabelscoreDefinitiefTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := GemeentecodeIdxMap[i.Gemeentecode]; !ok { - // store Gemeentecode in map at current index of tracker - Gemeentecode[GemeentecodeTracker] = i.Gemeentecode - // store key - idx - GemeentecodeIdxMap[i.Gemeentecode] = GemeentecodeTracker - // increase tracker - GemeentecodeTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := GemeentenaamIdxMap[i.Gemeentenaam]; !ok { - // store Gemeentenaam in map at current index of tracker - Gemeentenaam[GemeentenaamTracker] = i.Gemeentenaam - // store key - idx - GemeentenaamIdxMap[i.Gemeentenaam] = GemeentenaamTracker - // increase tracker - GemeentenaamTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := BuurtcodeIdxMap[i.Buurtcode]; !ok { - // store Buurtcode in map at current index of tracker - Buurtcode[BuurtcodeTracker] = i.Buurtcode - // store key - idx - BuurtcodeIdxMap[i.Buurtcode] = BuurtcodeTracker - // increase tracker - BuurtcodeTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := BuurtnaamIdxMap[i.Buurtnaam]; !ok { - // store Buurtnaam in map at current index of tracker - Buurtnaam[BuurtnaamTracker] = i.Buurtnaam - // store key - idx - BuurtnaamIdxMap[i.Buurtnaam] = BuurtnaamTracker - // increase tracker - BuurtnaamTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := WijkcodeIdxMap[i.Wijkcode]; !ok { - // store Wijkcode in map at current index of tracker - Wijkcode[WijkcodeTracker] = i.Wijkcode - // store key - idx - WijkcodeIdxMap[i.Wijkcode] = WijkcodeTracker - // increase tracker - WijkcodeTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := WijknaamIdxMap[i.Wijknaam]; !ok { - // store Wijknaam in map at current index of tracker - Wijknaam[WijknaamTracker] = i.Wijknaam - // store key - idx - WijknaamIdxMap[i.Wijknaam] = WijknaamTracker - // increase tracker - WijknaamTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := ProvinciecodeIdxMap[i.Provinciecode]; !ok { - // store Provinciecode in map at current index of tracker - Provinciecode[ProvinciecodeTracker] = i.Provinciecode - // store key - idx - ProvinciecodeIdxMap[i.Provinciecode] = ProvinciecodeTracker - // increase tracker - ProvinciecodeTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := ProvincienaamIdxMap[i.Provincienaam]; !ok { - // store Provincienaam in map at current index of tracker - Provincienaam[ProvincienaamTracker] = i.Provincienaam - // store key - idx - ProvincienaamIdxMap[i.Provincienaam] = ProvincienaamTracker - // increase tracker - ProvincienaamTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := PandGasEanAansluitingenIdxMap[i.PandGasEanAansluitingen]; !ok { - // store PandGasEanAansluitingen in map at current index of tracker - PandGasEanAansluitingen[PandGasEanAansluitingenTracker] = i.PandGasEanAansluitingen - // store key - idx - PandGasEanAansluitingenIdxMap[i.PandGasEanAansluitingen] = PandGasEanAansluitingenTracker - // increase tracker - PandGasEanAansluitingenTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := P6GasAansluitingen2020IdxMap[i.P6GasAansluitingen2020]; !ok { - // store P6GasAansluitingen2020 in map at current index of tracker - P6GasAansluitingen2020[P6GasAansluitingen2020Tracker] = i.P6GasAansluitingen2020 - // store key - idx - P6GasAansluitingen2020IdxMap[i.P6GasAansluitingen2020] = P6GasAansluitingen2020Tracker - // increase tracker - P6GasAansluitingen2020Tracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := P6Gasm32020IdxMap[i.P6Gasm32020]; !ok { - // store P6Gasm32020 in map at current index of tracker - P6Gasm32020[P6Gasm32020Tracker] = i.P6Gasm32020 - // store key - idx - P6Gasm32020IdxMap[i.P6Gasm32020] = P6Gasm32020Tracker - // increase tracker - P6Gasm32020Tracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := P6Kwh2020IdxMap[i.P6Kwh2020]; !ok { - // store P6Kwh2020 in map at current index of tracker - P6Kwh2020[P6Kwh2020Tracker] = i.P6Kwh2020 - // store key - idx - P6Kwh2020IdxMap[i.P6Kwh2020] = P6Kwh2020Tracker - // increase tracker - P6Kwh2020Tracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := PandBouwjaarIdxMap[i.PandBouwjaar]; !ok { - // store PandBouwjaar in map at current index of tracker - PandBouwjaar[PandBouwjaarTracker] = i.PandBouwjaar - // store key - idx - PandBouwjaarIdxMap[i.PandBouwjaar] = PandBouwjaarTracker - // increase tracker - PandBouwjaarTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := PandGasAansluitingenIdxMap[i.PandGasAansluitingen]; !ok { - // store PandGasAansluitingen in map at current index of tracker - PandGasAansluitingen[PandGasAansluitingenTracker] = i.PandGasAansluitingen - // store key - idx - PandGasAansluitingenIdxMap[i.PandGasAansluitingen] = PandGasAansluitingenTracker - // increase tracker - PandGasAansluitingenTracker += 1 - } - - //check if column value is already present - //else store new key - if _, ok := GebruiksdoelenIdxMap[i.Gebruiksdoelen]; !ok { - // store Gebruiksdoelen in map at current index of tracker - Gebruiksdoelen[GebruiksdoelenTracker] = i.Gebruiksdoelen - // store key - idx - GebruiksdoelenIdxMap[i.Gebruiksdoelen] = GebruiksdoelenTracker - // increase tracker - GebruiksdoelenTracker += 1 - } - - //check if column value is already present - //else store new key - doelen := make([]uint16, 0) - - // parsing {a, b} array values - // string should be at least 2 example "{}" == size 2 - if len(i.Gebruiksdoelen) > 2 { - - gebruiksdoelen, err := ParsePGArray(i.Gebruiksdoelen) - if err != nil { - log.Fatal(err, "error parsing array ") - } - - for _, gd := range gebruiksdoelen { - if _, ok := GebruiksdoelenIdxMap[gd]; !ok { - // store Gebruiksdoelen in map at current index of tracker - Gebruiksdoelen[GebruiksdoelenTracker] = gd - // store key - idx - GebruiksdoelenIdxMap[gd] = GebruiksdoelenTracker - // increase tracker - GebruiksdoelenTracker += 1 - } - } - - for _, v := range gebruiksdoelen { - doelen = append(doelen, GebruiksdoelenIdxMap[v]) - } - } + WoningType.Store(i.WoningType) + LabelscoreVoorlopig.Store(i.LabelscoreVoorlopig) + LabelscoreDefinitief.Store(i.LabelscoreDefinitief) + Gemeentecode.Store(i.Gemeentecode) + Gemeentenaam.Store(i.Gemeentenaam) + Buurtcode.Store(i.Buurtcode) + Buurtnaam.Store(i.Buurtnaam) + Wijkcode.Store(i.Wijkcode) + Wijknaam.Store(i.Wijknaam) + Provinciecode.Store(i.Provinciecode) + Provincienaam.Store(i.Provincienaam) + PandGasAansluitingen.Store(i.PandGasEanAansluitingen) + P6GasAansluitingen2020.Store(i.P6GasAansluitingen2020) + P6Gasm32020.Store(i.P6Gasm32020) + P6Kwh2020.Store(i.P6Kwh2020) + PandBouwjaar.Store(i.PandBouwjaar) + PandGasAansluitingen.Store(i.PandGasAansluitingen) + + doelen := Gebruiksdoelen.StoreArray(i.Gebruiksdoelen) return Item{ @@ -392,113 +178,43 @@ func (i ItemIn) Shrink(label int) Item { i.Oppervlakte, i.Woningequivalent, i.Adres, - WoningTypeIdxMap[i.WoningType], - LabelscoreVoorlopigIdxMap[i.LabelscoreVoorlopig], - LabelscoreDefinitiefIdxMap[i.LabelscoreDefinitief], - GemeentecodeIdxMap[i.Gemeentecode], - GemeentenaamIdxMap[i.Gemeentenaam], - BuurtcodeIdxMap[i.Buurtcode], - BuurtnaamIdxMap[i.Buurtnaam], - WijkcodeIdxMap[i.Wijkcode], - WijknaamIdxMap[i.Wijknaam], - ProvinciecodeIdxMap[i.Provinciecode], - ProvincienaamIdxMap[i.Provincienaam], + WoningType.GetIndex(i.WoningType), + LabelscoreVoorlopig.GetIndex(i.LabelscoreVoorlopig), + LabelscoreDefinitief.GetIndex(i.LabelscoreDefinitief), + Gemeentecode.GetIndex(i.Gemeentecode), + Gemeentenaam.GetIndex(i.Gemeentenaam), + Buurtcode.GetIndex(i.Buurtcode), + Buurtnaam.GetIndex(i.Buurtnaam), + Wijkcode.GetIndex(i.Wijkcode), + Wijknaam.GetIndex(i.Wijknaam), + Provinciecode.GetIndex(i.Provinciecode), + Provincienaam.GetIndex(i.Provincienaam), i.Point, - PandGasEanAansluitingenIdxMap[i.PandGasEanAansluitingen], + PandGasEanAansluitingen.GetIndex(i.PandGasEanAansluitingen), i.GroupId2020, - P6GasAansluitingen2020IdxMap[i.P6GasAansluitingen2020], - P6Gasm32020IdxMap[i.P6Gasm32020], - P6Kwh2020IdxMap[i.P6Kwh2020], + P6GasAansluitingen2020.GetIndex(i.P6GasAansluitingen2020), + P6Gasm32020.GetIndex(i.P6Gasm32020), + P6Kwh2020.GetIndex(i.P6Kwh2020), i.P6TotaalPandoppervlakM2, - PandBouwjaarIdxMap[i.PandBouwjaar], - PandGasAansluitingenIdxMap[i.PandGasAansluitingen], + PandBouwjaar.GetIndex(i.PandBouwjaar), + PandGasAansluitingen.GetIndex(i.PandGasAansluitingen), doelen, } } // Store selected columns in seperate map[columnvalue]bitarray -// for gast item lookup +// for fast item selection +// BitArrays cannot be serialized func (i Item) StoreBitArrayColumns() { - var ba bitarray.BitArray - var ok bool - - // Column WoningType has byte arrays for - ba, ok = WoningTypeItems[i.WoningType] - if !ok { - ba = bitarray.NewSparseBitArray() - WoningTypeItems[i.WoningType] = ba - } - - ba.SetBit(uint64(i.Label)) - // Column LabelscoreVoorlopig has byte arrays for - ba, ok = LabelscoreVoorlopigItems[i.LabelscoreVoorlopig] - if !ok { - ba = bitarray.NewSparseBitArray() - LabelscoreVoorlopigItems[i.LabelscoreVoorlopig] = ba - } - - ba.SetBit(uint64(i.Label)) - // Column LabelscoreDefinitief has byte arrays for - ba, ok = LabelscoreDefinitiefItems[i.LabelscoreDefinitief] - if !ok { - ba = bitarray.NewSparseBitArray() - LabelscoreDefinitiefItems[i.LabelscoreDefinitief] = ba - } - - ba.SetBit(uint64(i.Label)) - // Column Gemeentecode has byte arrays for - ba, ok = GemeentecodeItems[i.Gemeentecode] - if !ok { - ba = bitarray.NewSparseBitArray() - GemeentecodeItems[i.Gemeentecode] = ba - } - - ba.SetBit(uint64(i.Label)) - // Column Buurtcode has byte arrays for - ba, ok = BuurtcodeItems[i.Buurtcode] - if !ok { - ba = bitarray.NewSparseBitArray() - BuurtcodeItems[i.Buurtcode] = ba - } - - ba.SetBit(uint64(i.Label)) - // Column Wijkcode has byte arrays for - ba, ok = WijkcodeItems[i.Wijkcode] - if !ok { - ba = bitarray.NewSparseBitArray() - WijkcodeItems[i.Wijkcode] = ba - } - - ba.SetBit(uint64(i.Label)) - // Column Provinciecode has byte arrays for - ba, ok = ProvinciecodeItems[i.Provinciecode] - if !ok { - ba = bitarray.NewSparseBitArray() - ProvinciecodeItems[i.Provinciecode] = ba - } - - ba.SetBit(uint64(i.Label)) - - /* - // Column Buurtcode has byte arrays for - ba, ok = BuurtcodeItems[i.Buurtcode] - if !ok { - ba = bitarray.NewSparseBitArray() - BuurtcodeItems[i.Buurtcode] = ba - } - ba.SetBit(uint64(i.Label)) - */ - + SetBitArray("woning_type", i.WoningType, i.Label) + SetBitArray("labelscore_voorlopig", i.LabelscoreVoorlopig, i.Label) + SetBitArray("buurtcode", i.Buurtcode, i.Label) } func (i Item) Serialize() ItemOut { - lock.RLock() - defer lock.RUnlock() - return ItemOut{ - i.Pid, i.Vid, i.Numid, @@ -506,26 +222,26 @@ func (i Item) Serialize() ItemOut { i.Oppervlakte, i.Woningequivalent, i.Adres, - WoningType[i.WoningType], - LabelscoreVoorlopig[i.LabelscoreVoorlopig], - LabelscoreDefinitief[i.LabelscoreDefinitief], - Gemeentecode[i.Gemeentecode], - Gemeentenaam[i.Gemeentenaam], - Buurtcode[i.Buurtcode], - Buurtnaam[i.Buurtnaam], - Wijkcode[i.Wijkcode], - Wijknaam[i.Wijknaam], - Provinciecode[i.Provinciecode], - Provincienaam[i.Provincienaam], + WoningType.GetValue(i.WoningType), + LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), + LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief), + Gemeentecode.GetValue(i.Gemeentecode), + Gemeentenaam.GetValue(i.Gemeentenaam), + Buurtcode.GetValue(i.Buurtcode), + Buurtnaam.GetValue(i.Buurtnaam), + Wijkcode.GetValue(i.Wijkcode), + Wijknaam.GetValue(i.Wijknaam), + Provinciecode.GetValue(i.Provinciecode), + Provincienaam.GetValue(i.Provincienaam), i.Point, - PandGasEanAansluitingen[i.PandGasEanAansluitingen], + PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen), i.GroupId2020, - P6GasAansluitingen2020[i.P6GasAansluitingen2020], - P6Gasm32020[i.P6Gasm32020], - P6Kwh2020[i.P6Kwh2020], + P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020), + P6Gasm32020.GetValue(i.P6Gasm32020), + P6Kwh2020.GetValue(i.P6Kwh2020), i.P6TotaalPandoppervlakM2, - PandBouwjaar[i.PandBouwjaar], - PandGasAansluitingen[i.PandGasAansluitingen], + PandBouwjaar.GetValue(i.PandBouwjaar), + PandGasAansluitingen.GetValue(i.PandGasAansluitingen), GettersGebruiksdoelen(&i), } } @@ -600,9 +316,6 @@ func (i ItemOut) Columns() []string { func (i Item) Row() []string { - lock.RLock() - defer lock.RUnlock() - return []string{ i.Pid, @@ -612,26 +325,26 @@ func (i Item) Row() []string { i.Oppervlakte, i.Woningequivalent, i.Adres, - WoningType[i.WoningType], - LabelscoreVoorlopig[i.LabelscoreVoorlopig], - LabelscoreDefinitief[i.LabelscoreDefinitief], - Gemeentecode[i.Gemeentecode], - Gemeentenaam[i.Gemeentenaam], - Buurtcode[i.Buurtcode], - Buurtnaam[i.Buurtnaam], - Wijkcode[i.Wijkcode], - Wijknaam[i.Wijknaam], - Provinciecode[i.Provinciecode], - Provincienaam[i.Provincienaam], + WoningType.GetValue(i.WoningType), + LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), + LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief), + Gemeentecode.GetValue(i.Gemeentecode), + Gemeentenaam.GetValue(i.Gemeentenaam), + Buurtcode.GetValue(i.Buurtcode), + Buurtnaam.GetValue(i.Buurtnaam), + Wijkcode.GetValue(i.Wijkcode), + Wijknaam.GetValue(i.Wijknaam), + Provinciecode.GetValue(i.Provinciecode), + Provincienaam.GetValue(i.Provincienaam), i.Point, - PandGasEanAansluitingen[i.PandGasEanAansluitingen], + PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen), i.GroupId2020, - P6GasAansluitingen2020[i.P6GasAansluitingen2020], - P6Gasm32020[i.P6Gasm32020], - P6Kwh2020[i.P6Kwh2020], + P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020), + P6Gasm32020.GetValue(i.P6Gasm32020), + P6Kwh2020.GetValue(i.P6Kwh2020), i.P6TotaalPandoppervlakM2, - PandBouwjaar[i.PandBouwjaar], - PandGasAansluitingen[i.PandGasAansluitingen], + PandBouwjaar.GetValue(i.PandBouwjaar), + PandGasAansluitingen.GetValue(i.PandGasAansluitingen), GettersGebruiksdoelen(&i), } } @@ -786,222 +499,222 @@ func GettersAdres(i *Item) string { // contain filter WoningType func FilterWoningTypeContains(i *Item, s string) bool { - return strings.Contains(WoningType[i.WoningType], s) + return strings.Contains(WoningType.GetValue(i.WoningType), s) } // startswith filter WoningType func FilterWoningTypeStartsWith(i *Item, s string) bool { - return strings.HasPrefix(WoningType[i.WoningType], s) + return strings.HasPrefix(WoningType.GetValue(i.WoningType), s) } // match filters WoningType func FilterWoningTypeMatch(i *Item, s string) bool { - return WoningType[i.WoningType] == s + return WoningType.GetValue(i.WoningType) == s } // getter WoningType func GettersWoningType(i *Item) string { - return WoningType[i.WoningType] + return WoningType.GetValue(i.WoningType) } // contain filter LabelscoreVoorlopig func FilterLabelscoreVoorlopigContains(i *Item, s string) bool { - return strings.Contains(LabelscoreVoorlopig[i.LabelscoreVoorlopig], s) + return strings.Contains(LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), s) } // startswith filter LabelscoreVoorlopig func FilterLabelscoreVoorlopigStartsWith(i *Item, s string) bool { - return strings.HasPrefix(LabelscoreVoorlopig[i.LabelscoreVoorlopig], s) + return strings.HasPrefix(LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), s) } // match filters LabelscoreVoorlopig func FilterLabelscoreVoorlopigMatch(i *Item, s string) bool { - return LabelscoreVoorlopig[i.LabelscoreVoorlopig] == s + return LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig) == s } // getter LabelscoreVoorlopig func GettersLabelscoreVoorlopig(i *Item) string { - return LabelscoreVoorlopig[i.LabelscoreVoorlopig] + return LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig) } // contain filter LabelscoreDefinitief func FilterLabelscoreDefinitiefContains(i *Item, s string) bool { - return strings.Contains(LabelscoreDefinitief[i.LabelscoreDefinitief], s) + return strings.Contains(LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief), s) } // startswith filter LabelscoreDefinitief func FilterLabelscoreDefinitiefStartsWith(i *Item, s string) bool { - return strings.HasPrefix(LabelscoreDefinitief[i.LabelscoreDefinitief], s) + return strings.HasPrefix(LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief), s) } // match filters LabelscoreDefinitief func FilterLabelscoreDefinitiefMatch(i *Item, s string) bool { - return LabelscoreDefinitief[i.LabelscoreDefinitief] == s + return LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief) == s } // getter LabelscoreDefinitief func GettersLabelscoreDefinitief(i *Item) string { - return LabelscoreDefinitief[i.LabelscoreDefinitief] + return LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief) } // contain filter Gemeentecode func FilterGemeentecodeContains(i *Item, s string) bool { - return strings.Contains(Gemeentecode[i.Gemeentecode], s) + return strings.Contains(Gemeentecode.GetValue(i.Gemeentecode), s) } // startswith filter Gemeentecode func FilterGemeentecodeStartsWith(i *Item, s string) bool { - return strings.HasPrefix(Gemeentecode[i.Gemeentecode], s) + return strings.HasPrefix(Gemeentecode.GetValue(i.Gemeentecode), s) } // match filters Gemeentecode func FilterGemeentecodeMatch(i *Item, s string) bool { - return Gemeentecode[i.Gemeentecode] == s + return Gemeentecode.GetValue(i.Gemeentecode) == s } // getter Gemeentecode func GettersGemeentecode(i *Item) string { - return Gemeentecode[i.Gemeentecode] + return Gemeentecode.GetValue(i.Gemeentecode) } // contain filter Gemeentenaam func FilterGemeentenaamContains(i *Item, s string) bool { - return strings.Contains(Gemeentenaam[i.Gemeentenaam], s) + return strings.Contains(Gemeentenaam.GetValue(i.Gemeentenaam), s) } // startswith filter Gemeentenaam func FilterGemeentenaamStartsWith(i *Item, s string) bool { - return strings.HasPrefix(Gemeentenaam[i.Gemeentenaam], s) + return strings.HasPrefix(Gemeentenaam.GetValue(i.Gemeentenaam), s) } // match filters Gemeentenaam func FilterGemeentenaamMatch(i *Item, s string) bool { - return Gemeentenaam[i.Gemeentenaam] == s + return Gemeentenaam.GetValue(i.Gemeentenaam) == s } // getter Gemeentenaam func GettersGemeentenaam(i *Item) string { - return Gemeentenaam[i.Gemeentenaam] + return Gemeentenaam.GetValue(i.Gemeentenaam) } // contain filter Buurtcode func FilterBuurtcodeContains(i *Item, s string) bool { - return strings.Contains(Buurtcode[i.Buurtcode], s) + return strings.Contains(Buurtcode.GetValue(i.Buurtcode), s) } // startswith filter Buurtcode func FilterBuurtcodeStartsWith(i *Item, s string) bool { - return strings.HasPrefix(Buurtcode[i.Buurtcode], s) + return strings.HasPrefix(Buurtcode.GetValue(i.Buurtcode), s) } // match filters Buurtcode func FilterBuurtcodeMatch(i *Item, s string) bool { - return Buurtcode[i.Buurtcode] == s + return Buurtcode.GetValue(i.Buurtcode) == s } // getter Buurtcode func GettersBuurtcode(i *Item) string { - return Buurtcode[i.Buurtcode] + return Buurtcode.GetValue(i.Buurtcode) } // contain filter Buurtnaam func FilterBuurtnaamContains(i *Item, s string) bool { - return strings.Contains(Buurtnaam[i.Buurtnaam], s) + return strings.Contains(Buurtnaam.GetValue(i.Buurtnaam), s) } // startswith filter Buurtnaam func FilterBuurtnaamStartsWith(i *Item, s string) bool { - return strings.HasPrefix(Buurtnaam[i.Buurtnaam], s) + return strings.HasPrefix(Buurtnaam.GetValue(i.Buurtnaam), s) } // match filters Buurtnaam func FilterBuurtnaamMatch(i *Item, s string) bool { - return Buurtnaam[i.Buurtnaam] == s + return Buurtnaam.GetValue(i.Buurtnaam) == s } // getter Buurtnaam func GettersBuurtnaam(i *Item) string { - return Buurtnaam[i.Buurtnaam] + return Buurtnaam.GetValue(i.Buurtnaam) } // contain filter Wijkcode func FilterWijkcodeContains(i *Item, s string) bool { - return strings.Contains(Wijkcode[i.Wijkcode], s) + return strings.Contains(Wijkcode.GetValue(i.Wijkcode), s) } // startswith filter Wijkcode func FilterWijkcodeStartsWith(i *Item, s string) bool { - return strings.HasPrefix(Wijkcode[i.Wijkcode], s) + return strings.HasPrefix(Wijkcode.GetValue(i.Wijkcode), s) } // match filters Wijkcode func FilterWijkcodeMatch(i *Item, s string) bool { - return Wijkcode[i.Wijkcode] == s + return i.Wijkcode == Wijkcode.GetIndex(s) } // getter Wijkcode func GettersWijkcode(i *Item) string { - return Wijkcode[i.Wijkcode] + return Wijkcode.GetValue(i.Wijkcode) } // contain filter Wijknaam func FilterWijknaamContains(i *Item, s string) bool { - return strings.Contains(Wijknaam[i.Wijknaam], s) + return strings.Contains(Wijknaam.GetValue(i.Wijknaam), s) } // startswith filter Wijknaam func FilterWijknaamStartsWith(i *Item, s string) bool { - return strings.HasPrefix(Wijknaam[i.Wijknaam], s) + return strings.HasPrefix(Wijknaam.GetValue(i.Wijknaam), s) } // match filters Wijknaam func FilterWijknaamMatch(i *Item, s string) bool { - return Wijknaam[i.Wijknaam] == s + return Wijknaam.GetIndex(s) == i.Wijknaam } // getter Wijknaam func GettersWijknaam(i *Item) string { - return Wijknaam[i.Wijknaam] + return Wijknaam.GetValue(i.Wijknaam) } // contain filter Provinciecode func FilterProvinciecodeContains(i *Item, s string) bool { - return strings.Contains(Provinciecode[i.Provinciecode], s) + return strings.Contains(Provinciecode.GetValue(i.Provinciecode), s) } // startswith filter Provinciecode func FilterProvinciecodeStartsWith(i *Item, s string) bool { - return strings.HasPrefix(Provinciecode[i.Provinciecode], s) + return strings.HasPrefix(Provinciecode.GetValue(i.Provinciecode), s) } // match filters Provinciecode func FilterProvinciecodeMatch(i *Item, s string) bool { - return Provinciecode[i.Provinciecode] == s + return Provinciecode.GetValue(i.Provinciecode) == s } // getter Provinciecode func GettersProvinciecode(i *Item) string { - return Provinciecode[i.Provinciecode] + return Provinciecode.GetValue(i.Provinciecode) } // contain filter Provincienaam func FilterProvincienaamContains(i *Item, s string) bool { - return strings.Contains(Provincienaam[i.Provincienaam], s) + return strings.Contains(Provincienaam.GetValue(i.Provincienaam), s) } // startswith filter Provincienaam func FilterProvincienaamStartsWith(i *Item, s string) bool { - return strings.HasPrefix(Provincienaam[i.Provincienaam], s) + return strings.HasPrefix(Provincienaam.GetValue(i.Provincienaam), s) } // match filters Provincienaam func FilterProvincienaamMatch(i *Item, s string) bool { - return Provincienaam[i.Provincienaam] == s + return Provincienaam.GetValue(i.Provincienaam) == s } // getter Provincienaam func GettersProvincienaam(i *Item) string { - return Provincienaam[i.Provincienaam] + return Provincienaam.GetValue(i.Provincienaam) } // contain filter Point @@ -1026,22 +739,22 @@ func GettersPoint(i *Item) string { // contain filter PandGasEanAansluitingen func FilterPandGasEanAansluitingenContains(i *Item, s string) bool { - return strings.Contains(PandGasEanAansluitingen[i.PandGasEanAansluitingen], s) + return strings.Contains(PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen), s) } // startswith filter PandGasEanAansluitingen func FilterPandGasEanAansluitingenStartsWith(i *Item, s string) bool { - return strings.HasPrefix(PandGasEanAansluitingen[i.PandGasEanAansluitingen], s) + return strings.HasPrefix(PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen), s) } // match filters PandGasEanAansluitingen func FilterPandGasEanAansluitingenMatch(i *Item, s string) bool { - return PandGasEanAansluitingen[i.PandGasEanAansluitingen] == s + return PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen) == s } // getter PandGasEanAansluitingen func GettersPandGasEanAansluitingen(i *Item) string { - return PandGasEanAansluitingen[i.PandGasEanAansluitingen] + return PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen) } // contain filter GroupId2020 @@ -1066,62 +779,62 @@ func GettersGroupId2020(i *Item) string { // contain filter P6GasAansluitingen2020 func FilterP6GasAansluitingen2020Contains(i *Item, s string) bool { - return strings.Contains(P6GasAansluitingen2020[i.P6GasAansluitingen2020], s) + return strings.Contains(P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020), s) } // startswith filter P6GasAansluitingen2020 func FilterP6GasAansluitingen2020StartsWith(i *Item, s string) bool { - return strings.HasPrefix(P6GasAansluitingen2020[i.P6GasAansluitingen2020], s) + return strings.HasPrefix(P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020), s) } // match filters P6GasAansluitingen2020 func FilterP6GasAansluitingen2020Match(i *Item, s string) bool { - return P6GasAansluitingen2020[i.P6GasAansluitingen2020] == s + return P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020) == s } // getter P6GasAansluitingen2020 func GettersP6GasAansluitingen2020(i *Item) string { - return P6GasAansluitingen2020[i.P6GasAansluitingen2020] + return P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020) } // contain filter P6Gasm32020 func FilterP6Gasm32020Contains(i *Item, s string) bool { - return strings.Contains(P6Gasm32020[i.P6Gasm32020], s) + return strings.Contains(P6Gasm32020.GetValue(i.P6Gasm32020), s) } // startswith filter P6Gasm32020 func FilterP6Gasm32020StartsWith(i *Item, s string) bool { - return strings.HasPrefix(P6Gasm32020[i.P6Gasm32020], s) + return strings.HasPrefix(P6Gasm32020.GetValue(i.P6Gasm32020), s) } // match filters P6Gasm32020 func FilterP6Gasm32020Match(i *Item, s string) bool { - return P6Gasm32020[i.P6Gasm32020] == s + return P6Gasm32020.GetValue(i.P6Gasm32020) == s } // getter P6Gasm32020 func GettersP6Gasm32020(i *Item) string { - return P6Gasm32020[i.P6Gasm32020] + return P6Gasm32020.GetValue(i.P6Gasm32020) } // contain filter P6Kwh2020 func FilterP6Kwh2020Contains(i *Item, s string) bool { - return strings.Contains(P6Kwh2020[i.P6Kwh2020], s) + return strings.Contains(P6Kwh2020.GetValue(i.P6Kwh2020), s) } // startswith filter P6Kwh2020 func FilterP6Kwh2020StartsWith(i *Item, s string) bool { - return strings.HasPrefix(P6Kwh2020[i.P6Kwh2020], s) + return strings.HasPrefix(P6Kwh2020.GetValue(i.P6Kwh2020), s) } // match filters P6Kwh2020 func FilterP6Kwh2020Match(i *Item, s string) bool { - return P6Kwh2020[i.P6Kwh2020] == s + return P6Kwh2020.GetValue(i.P6Kwh2020) == s } // getter P6Kwh2020 func GettersP6Kwh2020(i *Item) string { - return P6Kwh2020[i.P6Kwh2020] + return P6Kwh2020.GetValue(i.P6Kwh2020) } // contain filter P6TotaalPandoppervlakM2 @@ -1146,48 +859,48 @@ func GettersP6TotaalPandoppervlakM2(i *Item) string { // contain filter PandBouwjaar func FilterPandBouwjaarContains(i *Item, s string) bool { - return strings.Contains(PandBouwjaar[i.PandBouwjaar], s) + return strings.Contains(PandBouwjaar.GetValue(i.PandBouwjaar), s) } // startswith filter PandBouwjaar func FilterPandBouwjaarStartsWith(i *Item, s string) bool { - return strings.HasPrefix(PandBouwjaar[i.PandBouwjaar], s) + return strings.HasPrefix(PandBouwjaar.GetValue(i.PandBouwjaar), s) } // match filters PandBouwjaar func FilterPandBouwjaarMatch(i *Item, s string) bool { - return PandBouwjaar[i.PandBouwjaar] == s + return PandBouwjaar.GetValue(i.PandBouwjaar) == s } // getter PandBouwjaar func GettersPandBouwjaar(i *Item) string { - return PandBouwjaar[i.PandBouwjaar] + return PandBouwjaar.GetValue(i.PandBouwjaar) } // contain filter PandGasAansluitingen func FilterPandGasAansluitingenContains(i *Item, s string) bool { - return strings.Contains(PandGasAansluitingen[i.PandGasAansluitingen], s) + return strings.Contains(PandGasAansluitingen.GetValue(i.PandGasAansluitingen), s) } // startswith filter PandGasAansluitingen func FilterPandGasAansluitingenStartsWith(i *Item, s string) bool { - return strings.HasPrefix(PandGasAansluitingen[i.PandGasAansluitingen], s) + return strings.HasPrefix(PandGasAansluitingen.GetValue(i.PandGasAansluitingen), s) } // match filters PandGasAansluitingen func FilterPandGasAansluitingenMatch(i *Item, s string) bool { - return PandGasAansluitingen[i.PandGasAansluitingen] == s + return PandGasAansluitingen.GetValue(i.PandGasAansluitingen) == s } // getter PandGasAansluitingen func GettersPandGasAansluitingen(i *Item) string { - return PandGasAansluitingen[i.PandGasAansluitingen] + return PandGasAansluitingen.GetValue(i.PandGasAansluitingen) } // contain filter Gebruiksdoelen func FilterGebruiksdoelenContains(i *Item, s string) bool { for _, v := range i.Gebruiksdoelen { - vs := Gebruiksdoelen[v] + vs := Gebruiksdoelen.GetValue(v) if strings.Contains(vs, s) { return true } @@ -1198,7 +911,7 @@ func FilterGebruiksdoelenContains(i *Item, s string) bool { // startswith filter Gebruiksdoelen func FilterGebruiksdoelenStartsWith(i *Item, s string) bool { for _, v := range i.Gebruiksdoelen { - vs := Gebruiksdoelen[v] + vs := Gebruiksdoelen.GetValue(v) if strings.HasPrefix(vs, s) { return true } @@ -1210,7 +923,7 @@ func FilterGebruiksdoelenStartsWith(i *Item, s string) bool { // match filters Gebruiksdoelen func FilterGebruiksdoelenMatch(i *Item, s string) bool { for _, v := range i.Gebruiksdoelen { - vs := Gebruiksdoelen[v] + vs := Gebruiksdoelen.GetValue(v) if vs == s { return true } @@ -1222,7 +935,7 @@ func FilterGebruiksdoelenMatch(i *Item, s string) bool { func GettersGebruiksdoelen(i *Item) string { doelen := make([]string, 0) for _, v := range i.Gebruiksdoelen { - vs := Gebruiksdoelen[v] + vs := Gebruiksdoelen.GetValue(v) doelen = append(doelen, vs) } return strings.Join(doelen, ", ") @@ -1232,7 +945,7 @@ func GettersGebruiksdoelen(i *Item) string { func GroupByGettersGebruiksdoelen(item *Item, grouping ItemsGroupedBy) { for i := range item.Gebruiksdoelen { - groupkey := Gebruiksdoelen[item.Gebruiksdoelen[i]] + groupkey := Gebruiksdoelen.GetValue(item.Gebruiksdoelen[i]) grouping[groupkey] = append(grouping[groupkey], item) } } @@ -1298,139 +1011,6 @@ func validateRegisters() error { return nil } -// GetBitArrayWoningType for given v string see if there is -// a bitarray created. -func GetBitArrayWoningType(v string) (bitarray.BitArray, error) { - - bpi, ok := WoningTypeIdxMap[v] - - if !ok { - return nil, errors.New("no bitarray filter found for column value WoningType") - } - - ba, ok := WoningTypeItems[bpi] - - if !ok { - return nil, errors.New("no bitarray filter found for column idx value WoningType") - } - - return ba, nil -} - -// GetBitArrayLabelscoreVoorlopig for given v string see if there is -// a bitarray created. -func GetBitArrayLabelscoreVoorlopig(v string) (bitarray.BitArray, error) { - - bpi, ok := LabelscoreVoorlopigIdxMap[v] - - if !ok { - return nil, errors.New("no bitarray filter found for column value LabelscoreVoorlopig") - } - - ba, ok := LabelscoreVoorlopigItems[bpi] - - if !ok { - return nil, errors.New("no bitarray filter found for column idx value LabelscoreVoorlopig") - } - - return ba, nil -} - -// GetBitArrayLabelscoreDefinitief for given v string see if there is -// a bitarray created. -func GetBitArrayLabelscoreDefinitief(v string) (bitarray.BitArray, error) { - - bpi, ok := LabelscoreDefinitiefIdxMap[v] - - if !ok { - return nil, errors.New("no bitarray filter found for column value LabelscoreDefinitief") - } - - ba, ok := LabelscoreDefinitiefItems[bpi] - - if !ok { - return nil, errors.New("no bitarray filter found for column idx value LabelscoreDefinitief") - } - - return ba, nil -} - -// GetBitArrayGemeentecode for given v string see if there is -// a bitarray created. -func GetBitArrayGemeentecode(v string) (bitarray.BitArray, error) { - - bpi, ok := GemeentecodeIdxMap[v] - - if !ok { - return nil, errors.New("no bitarray filter found for column value Gemeentecode") - } - - ba, ok := GemeentecodeItems[bpi] - - if !ok { - return nil, errors.New("no bitarray filter found for column idx value Gemeentecode") - } - - return ba, nil -} - -// GetBitArrayBuurtcode for given v string see if there is -// a bitarray created. -func GetBitArrayBuurtcode(v string) (bitarray.BitArray, error) { - - bpi, ok := BuurtcodeIdxMap[v] - - if !ok { - return nil, errors.New("no bitarray filter found for column value Buurtcode") - } - - ba, ok := BuurtcodeItems[bpi] - - if !ok { - return nil, errors.New("no bitarray filter found for column idx value Buurtcode") - } - - return ba, nil -} - -// GetBitArrayWijkcode for given v string see if there is -// a bitarray created. -func GetBitArrayWijkcode(v string) (bitarray.BitArray, error) { - - bpi, ok := WijkcodeIdxMap[v] - - if !ok { - return nil, errors.New("no bitarray filter found for column value Wijkcode") - } - - ba, ok := WijkcodeItems[bpi] - - if !ok { - return nil, errors.New("no bitarray filter found for column idx value Wijkcode") - } - - return ba, nil -} - -// GetBitArrayProvinciecode for given v string see if there is -// a bitarray created. -func GetBitArrayProvinciecode(v string) (bitarray.BitArray, error) { - - bpi, ok := ProvinciecodeIdxMap[v] - - if !ok { - return nil, errors.New("no bitarray filter found for column value Provinciecode") - } - - ba, ok := ProvinciecodeItems[bpi] - - if !ok { - return nil, errors.New("no bitarray filter found for column idx value Provinciecode") - } - - return ba, nil -} - func init() { RegisterFuncMap = make(registerFuncType) @@ -1504,8 +1084,6 @@ func init() { RegisterGetters["woning_type"] = GettersWoningType RegisterGroupBy["woning_type"] = GettersWoningType - RegisterBitArray["woning_type"] = GetBitArrayWoningType - //register filters for LabelscoreVoorlopig RegisterFuncMap["match-labelscore_voorlopig"] = FilterLabelscoreVoorlopigMatch RegisterFuncMap["contains-labelscore_voorlopig"] = FilterLabelscoreVoorlopigContains @@ -1513,8 +1091,6 @@ func init() { RegisterGetters["labelscore_voorlopig"] = GettersLabelscoreVoorlopig RegisterGroupBy["labelscore_voorlopig"] = GettersLabelscoreVoorlopig - RegisterBitArray["labelscore_voorlopig"] = GetBitArrayLabelscoreVoorlopig - //register filters for LabelscoreDefinitief RegisterFuncMap["match-labelscore_definitief"] = FilterLabelscoreDefinitiefMatch RegisterFuncMap["contains-labelscore_definitief"] = FilterLabelscoreDefinitiefContains @@ -1522,8 +1098,6 @@ func init() { RegisterGetters["labelscore_definitief"] = GettersLabelscoreDefinitief RegisterGroupBy["labelscore_definitief"] = GettersLabelscoreDefinitief - RegisterBitArray["labelscore_definitief"] = GetBitArrayLabelscoreDefinitief - //register filters for Gemeentecode RegisterFuncMap["match-gemeentecode"] = FilterGemeentecodeMatch RegisterFuncMap["contains-gemeentecode"] = FilterGemeentecodeContains @@ -1531,8 +1105,6 @@ func init() { RegisterGetters["gemeentecode"] = GettersGemeentecode RegisterGroupBy["gemeentecode"] = GettersGemeentecode - RegisterBitArray["gemeentecode"] = GetBitArrayGemeentecode - //register filters for Gemeentenaam RegisterFuncMap["match-gemeentenaam"] = FilterGemeentenaamMatch RegisterFuncMap["contains-gemeentenaam"] = FilterGemeentenaamContains @@ -1547,8 +1119,6 @@ func init() { RegisterGetters["buurtcode"] = GettersBuurtcode RegisterGroupBy["buurtcode"] = GettersBuurtcode - RegisterBitArray["buurtcode"] = GetBitArrayBuurtcode - //register filters for Buurtnaam RegisterFuncMap["match-buurtnaam"] = FilterBuurtnaamMatch RegisterFuncMap["contains-buurtnaam"] = FilterBuurtnaamContains @@ -1563,8 +1133,6 @@ func init() { RegisterGetters["wijkcode"] = GettersWijkcode RegisterGroupBy["wijkcode"] = GettersWijkcode - RegisterBitArray["wijkcode"] = GetBitArrayWijkcode - //register filters for Wijknaam RegisterFuncMap["match-wijknaam"] = FilterWijknaamMatch RegisterFuncMap["contains-wijknaam"] = FilterWijknaamContains @@ -1579,8 +1147,6 @@ func init() { RegisterGetters["provinciecode"] = GettersProvinciecode RegisterGroupBy["provinciecode"] = GettersProvinciecode - RegisterBitArray["provinciecode"] = GetBitArrayProvinciecode - //register filters for Provincienaam RegisterFuncMap["match-provincienaam"] = FilterProvincienaamMatch RegisterFuncMap["contains-provincienaam"] = FilterProvincienaamContains @@ -1659,6 +1225,7 @@ func init() { RegisterGroupBy["gebruiksdoelen"] = GettersGebruiksdoelen validateRegisters() + /* RegisterFuncMap["match-ekey"] = FilterEkeyMatch RegisterFuncMap["contains-ekey"] = FilterEkeyContains @@ -1703,119 +1270,27 @@ func createSort(items Items) sortLookup { "adres": func(i, j int) bool { return items[i].Adres < items[j].Adres }, "-adres": func(i, j int) bool { return items[i].Adres > items[j].Adres }, - "woning_type": func(i, j int) bool { return WoningType[items[i].WoningType] < WoningType[items[j].WoningType] }, - "-woning_type": func(i, j int) bool { return WoningType[items[i].WoningType] > WoningType[items[j].WoningType] }, - - "labelscore_voorlopig": func(i, j int) bool { - return LabelscoreVoorlopig[items[i].LabelscoreVoorlopig] < LabelscoreVoorlopig[items[j].LabelscoreVoorlopig] - }, - "-labelscore_voorlopig": func(i, j int) bool { - return LabelscoreVoorlopig[items[i].LabelscoreVoorlopig] > LabelscoreVoorlopig[items[j].LabelscoreVoorlopig] - }, - - "labelscore_definitief": func(i, j int) bool { - return LabelscoreDefinitief[items[i].LabelscoreDefinitief] < LabelscoreDefinitief[items[j].LabelscoreDefinitief] - }, - "-labelscore_definitief": func(i, j int) bool { - return LabelscoreDefinitief[items[i].LabelscoreDefinitief] > LabelscoreDefinitief[items[j].LabelscoreDefinitief] - }, - - "gemeentecode": func(i, j int) bool { return Gemeentecode[items[i].Gemeentecode] < Gemeentecode[items[j].Gemeentecode] }, - "-gemeentecode": func(i, j int) bool { return Gemeentecode[items[i].Gemeentecode] > Gemeentecode[items[j].Gemeentecode] }, - - "gemeentenaam": func(i, j int) bool { return Gemeentenaam[items[i].Gemeentenaam] < Gemeentenaam[items[j].Gemeentenaam] }, - "-gemeentenaam": func(i, j int) bool { return Gemeentenaam[items[i].Gemeentenaam] > Gemeentenaam[items[j].Gemeentenaam] }, - - "buurtcode": func(i, j int) bool { return Buurtcode[items[i].Buurtcode] < Buurtcode[items[j].Buurtcode] }, - "-buurtcode": func(i, j int) bool { return Buurtcode[items[i].Buurtcode] > Buurtcode[items[j].Buurtcode] }, - - "buurtnaam": func(i, j int) bool { return Buurtnaam[items[i].Buurtnaam] < Buurtnaam[items[j].Buurtnaam] }, - "-buurtnaam": func(i, j int) bool { return Buurtnaam[items[i].Buurtnaam] > Buurtnaam[items[j].Buurtnaam] }, - - "wijkcode": func(i, j int) bool { return Wijkcode[items[i].Wijkcode] < Wijkcode[items[j].Wijkcode] }, - "-wijkcode": func(i, j int) bool { return Wijkcode[items[i].Wijkcode] > Wijkcode[items[j].Wijkcode] }, - - "wijknaam": func(i, j int) bool { return Wijknaam[items[i].Wijknaam] < Wijknaam[items[j].Wijknaam] }, - "-wijknaam": func(i, j int) bool { return Wijknaam[items[i].Wijknaam] > Wijknaam[items[j].Wijknaam] }, - - "provinciecode": func(i, j int) bool { - return Provinciecode[items[i].Provinciecode] < Provinciecode[items[j].Provinciecode] - }, - "-provinciecode": func(i, j int) bool { - return Provinciecode[items[i].Provinciecode] > Provinciecode[items[j].Provinciecode] - }, - - "provincienaam": func(i, j int) bool { - return Provincienaam[items[i].Provincienaam] < Provincienaam[items[j].Provincienaam] + "woning_type": func(i, j int) bool { + return WoningType.GetValue(items[i].WoningType) < WoningType.GetValue(items[j].WoningType) }, - "-provincienaam": func(i, j int) bool { - return Provincienaam[items[i].Provincienaam] > Provincienaam[items[j].Provincienaam] + "-woning_type": func(i, j int) bool { + return WoningType.GetValue(items[i].WoningType) > WoningType.GetValue(items[j].WoningType) }, "point": func(i, j int) bool { return items[i].Point < items[j].Point }, "-point": func(i, j int) bool { return items[i].Point > items[j].Point }, - - "pand_gas_ean_aansluitingen": func(i, j int) bool { - return PandGasEanAansluitingen[items[i].PandGasEanAansluitingen] < PandGasEanAansluitingen[items[j].PandGasEanAansluitingen] - }, - "-pand_gas_ean_aansluitingen": func(i, j int) bool { - return PandGasEanAansluitingen[items[i].PandGasEanAansluitingen] > PandGasEanAansluitingen[items[j].PandGasEanAansluitingen] - }, - - "group_id_2020": func(i, j int) bool { return items[i].GroupId2020 < items[j].GroupId2020 }, - "-group_id_2020": func(i, j int) bool { return items[i].GroupId2020 > items[j].GroupId2020 }, - - "p6_gas_aansluitingen_2020": func(i, j int) bool { - return P6GasAansluitingen2020[items[i].P6GasAansluitingen2020] < P6GasAansluitingen2020[items[j].P6GasAansluitingen2020] - }, - "-p6_gas_aansluitingen_2020": func(i, j int) bool { - return P6GasAansluitingen2020[items[i].P6GasAansluitingen2020] > P6GasAansluitingen2020[items[j].P6GasAansluitingen2020] - }, - - "p6_gasm3_2020": func(i, j int) bool { return P6Gasm32020[items[i].P6Gasm32020] < P6Gasm32020[items[j].P6Gasm32020] }, - "-p6_gasm3_2020": func(i, j int) bool { return P6Gasm32020[items[i].P6Gasm32020] > P6Gasm32020[items[j].P6Gasm32020] }, - - "p6_kwh_2020": func(i, j int) bool { return P6Kwh2020[items[i].P6Kwh2020] < P6Kwh2020[items[j].P6Kwh2020] }, - "-p6_kwh_2020": func(i, j int) bool { return P6Kwh2020[items[i].P6Kwh2020] > P6Kwh2020[items[j].P6Kwh2020] }, - - "p6_totaal_pandoppervlak_m2": func(i, j int) bool { return items[i].P6TotaalPandoppervlakM2 < items[j].P6TotaalPandoppervlakM2 }, - "-p6_totaal_pandoppervlak_m2": func(i, j int) bool { return items[i].P6TotaalPandoppervlakM2 > items[j].P6TotaalPandoppervlakM2 }, - - "pand_bouwjaar": func(i, j int) bool { return PandBouwjaar[items[i].PandBouwjaar] < PandBouwjaar[items[j].PandBouwjaar] }, - "-pand_bouwjaar": func(i, j int) bool { return PandBouwjaar[items[i].PandBouwjaar] > PandBouwjaar[items[j].PandBouwjaar] }, - - "pand_gas_aansluitingen": func(i, j int) bool { - return PandGasAansluitingen[items[i].PandGasAansluitingen] < PandGasAansluitingen[items[j].PandGasAansluitingen] - }, - "-pand_gas_aansluitingen": func(i, j int) bool { - return PandGasAansluitingen[items[i].PandGasAansluitingen] > PandGasAansluitingen[items[j].PandGasAansluitingen] - }, - - "gebruiksdoelen": func(i, j int) bool { - return GettersGebruiksdoelen(items[i]) < GettersGebruiksdoelen(items[j]) - }, - "-gebruiksdoelen": func(i, j int) bool { - return GettersGebruiksdoelen(items[i]) > GettersGebruiksdoelen(items[j]) - }, - - /* - "ekey": func(i, j int) bool { return items[i].Ekey < items[j].Ekey }, - "-ekey": func(i, j int) bool { return items[i].Ekey > items[j].Ekey }, - */ } return sortFuncs } func sortBy(items Items, sortingL []string) (Items, []string) { - - lock.Lock() - defer lock.Unlock() - sortFuncs := createSort(items) for _, sortFuncName := range sortingL { - sortFunc := sortFuncs[sortFuncName] - sort.Slice(items, sortFunc) + sortFunc, ok := sortFuncs[sortFuncName] + if ok { + sort.Slice(items, sortFunc) + } } // TODO must be nicer way diff --git a/model_maps.go b/model_maps.go index 1999365..3dba615 100644 --- a/model_maps.go +++ b/model_maps.go @@ -1,444 +1,129 @@ /* - - When transforming ItemsIn to Items and back again to ItemsOut - - maps are needed to store lookup values. - - those are generated here. - + Transforming ItemsIn -> Items -> ItemsOut + Where Items has column values ar integers to save memmory + maps are needed to restore integers back to the actual values. + those are generated and stored here. */ - package main -import ( - "sync" -) +import () type ModelMaps struct { - WoningTypeTracker uint16 - WoningTypeIdxMap fieldIdxMap - WoningType fieldMapIdx - - LabelscoreVoorlopigTracker uint16 - LabelscoreVoorlopigIdxMap fieldIdxMap - LabelscoreVoorlopig fieldMapIdx - - // LabelscoreVoorlopigItems fieldItemsMap - - LabelscoreDefinitiefTracker uint16 - LabelscoreDefinitiefIdxMap fieldIdxMap - LabelscoreDefinitief fieldMapIdx - - // LabelscoreDefinitiefItems fieldItemsMap - - GemeentecodeTracker uint16 - GemeentecodeIdxMap fieldIdxMap - Gemeentecode fieldMapIdx - - // GemeentecodeItems fieldItemsMap - - GemeentenaamTracker uint16 - GemeentenaamIdxMap fieldIdxMap - Gemeentenaam fieldMapIdx - - BuurtcodeTracker uint16 - BuurtcodeIdxMap fieldIdxMap - Buurtcode fieldMapIdx - - // BuurtcodeItems fieldItemsMap - - BuurtnaamTracker uint16 - BuurtnaamIdxMap fieldIdxMap - Buurtnaam fieldMapIdx - - WijkcodeTracker uint16 - WijkcodeIdxMap fieldIdxMap - Wijkcode fieldMapIdx - - // WijkcodeItems fieldItemsMap - - WijknaamTracker uint16 - WijknaamIdxMap fieldIdxMap - Wijknaam fieldMapIdx - - ProvinciecodeTracker uint16 - ProvinciecodeIdxMap fieldIdxMap - Provinciecode fieldMapIdx - - // ProvinciecodeItems fieldItemsMap - - ProvincienaamTracker uint16 - ProvincienaamIdxMap fieldIdxMap - Provincienaam fieldMapIdx - - PandGasEanAansluitingenTracker uint16 - PandGasEanAansluitingenIdxMap fieldIdxMap - PandGasEanAansluitingen fieldMapIdx - - P6GasAansluitingen2020Tracker uint16 - P6GasAansluitingen2020IdxMap fieldIdxMap - P6GasAansluitingen2020 fieldMapIdx - - P6Gasm32020Tracker uint16 - P6Gasm32020IdxMap fieldIdxMap - P6Gasm32020 fieldMapIdx - - P6Kwh2020Tracker uint16 - P6Kwh2020IdxMap fieldIdxMap - P6Kwh2020 fieldMapIdx - - PandBouwjaarTracker uint16 - PandBouwjaarIdxMap fieldIdxMap - PandBouwjaar fieldMapIdx - - PandGasAansluitingenTracker uint16 - PandGasAansluitingenIdxMap fieldIdxMap - PandGasAansluitingen fieldMapIdx - - GebruiksdoelenTracker uint16 - GebruiksdoelenIdxMap fieldIdxMap - Gebruiksdoelen fieldMapIdx + WoningType MappedColumn + LabelscoreVoorlopig MappedColumn + LabelscoreDefinitief MappedColumn + Gemeentecode MappedColumn + Gemeentenaam MappedColumn + Buurtcode MappedColumn + Buurtnaam MappedColumn + Wijkcode MappedColumn + Wijknaam MappedColumn + Provinciecode MappedColumn + Provincienaam MappedColumn + PandGasEanAansluitingen MappedColumn + P6GasAansluitingen2020 MappedColumn + P6Gasm32020 MappedColumn + P6Kwh2020 MappedColumn + PandBouwjaar MappedColumn + PandGasAansluitingen MappedColumn + Gebruiksdoelen MappedColumn } +var modelmaps2 map[string]MappedColumn + // Column maps. // Store for each non distinct/repeated column -// unit16 -> string map and -// string -> unit16 map -// track count of distinct values - -var WoningTypeTracker uint16 -var WoningTypeIdxMap fieldIdxMap -var WoningType fieldMapIdx - -var WoningTypeItems fieldItemsMap - -var LabelscoreVoorlopigTracker uint16 -var LabelscoreVoorlopigIdxMap fieldIdxMap -var LabelscoreVoorlopig fieldMapIdx - -var LabelscoreVoorlopigItems fieldItemsMap - -var LabelscoreDefinitiefTracker uint16 -var LabelscoreDefinitiefIdxMap fieldIdxMap -var LabelscoreDefinitief fieldMapIdx - -var LabelscoreDefinitiefItems fieldItemsMap - -var GemeentecodeTracker uint16 -var GemeentecodeIdxMap fieldIdxMap -var Gemeentecode fieldMapIdx - -var GemeentecodeItems fieldItemsMap - -var GemeentenaamTracker uint16 -var GemeentenaamIdxMap fieldIdxMap -var Gemeentenaam fieldMapIdx - -var BuurtcodeTracker uint16 -var BuurtcodeIdxMap fieldIdxMap -var Buurtcode fieldMapIdx - -var BuurtcodeItems fieldItemsMap - -var BuurtnaamTracker uint16 -var BuurtnaamIdxMap fieldIdxMap -var Buurtnaam fieldMapIdx - -var WijkcodeTracker uint16 -var WijkcodeIdxMap fieldIdxMap -var Wijkcode fieldMapIdx - -var WijkcodeItems fieldItemsMap - -var WijknaamTracker uint16 -var WijknaamIdxMap fieldIdxMap -var Wijknaam fieldMapIdx - -var ProvinciecodeTracker uint16 -var ProvinciecodeIdxMap fieldIdxMap -var Provinciecode fieldMapIdx - -var ProvinciecodeItems fieldItemsMap - -var ProvincienaamTracker uint16 -var ProvincienaamIdxMap fieldIdxMap -var Provincienaam fieldMapIdx - -var PandGasEanAansluitingenTracker uint16 -var PandGasEanAansluitingenIdxMap fieldIdxMap -var PandGasEanAansluitingen fieldMapIdx - -var P6GasAansluitingen2020Tracker uint16 -var P6GasAansluitingen2020IdxMap fieldIdxMap -var P6GasAansluitingen2020 fieldMapIdx - -var P6Gasm32020Tracker uint16 -var P6Gasm32020IdxMap fieldIdxMap -var P6Gasm32020 fieldMapIdx - -var P6Kwh2020Tracker uint16 -var P6Kwh2020IdxMap fieldIdxMap -var P6Kwh2020 fieldMapIdx - -var PandBouwjaarTracker uint16 -var PandBouwjaarIdxMap fieldIdxMap -var PandBouwjaar fieldMapIdx - -var PandGasAansluitingenTracker uint16 -var PandGasAansluitingenIdxMap fieldIdxMap -var PandGasAansluitingen fieldMapIdx - -var GebruiksdoelenTracker uint16 -var GebruiksdoelenIdxMap fieldIdxMap -var Gebruiksdoelen fieldMapIdx - -/* -var {columnname}Tracker uint16 -var {columnname}IdxMap fieldIdxMap -var {columnname} fieldMapIdx -var {columnname}Items fieldItemmap -*/ -// item map lock -var lock = sync.RWMutex{} - -// bitArray Lock -var balock = sync.RWMutex{} - -func initBitarrays() { - - WoningTypeItems = make(fieldItemsMap) - LabelscoreVoorlopigItems = make(fieldItemsMap) - LabelscoreDefinitiefItems = make(fieldItemsMap) - GemeentecodeItems = make(fieldItemsMap) - BuurtcodeItems = make(fieldItemsMap) - WijkcodeItems = make(fieldItemsMap) +var BitArrays map[string]fieldBitarrayMap + +var WoningType MappedColumn +var LabelscoreVoorlopig MappedColumn +var Gemeentecode MappedColumn +var LabelscoreDefinitief MappedColumn +var Gemeentenaam MappedColumn +var Buurtcode MappedColumn +var Buurtnaam MappedColumn +var Provinciecode MappedColumn +var Wijkcode MappedColumn +var Wijknaam MappedColumn +var Provincienaam MappedColumn +var PandGasEanAansluitingen MappedColumn +var P6GasAansluitingen2020 MappedColumn +var P6Gasm32020 MappedColumn +var P6Kwh2020 MappedColumn +var PandBouwjaar MappedColumn +var PandGasAansluitingen MappedColumn +var Gebruiksdoelen MappedColumn + +func clearBitArrays() { + BitArrays = make(map[string]fieldBitarrayMap) } -func setUpMaps() { - initBitarrays() - WoningTypeTracker = 0 - WoningTypeIdxMap = make(fieldIdxMap) - WoningType = make(fieldMapIdx) - - LabelscoreVoorlopigTracker = 0 - LabelscoreVoorlopigIdxMap = make(fieldIdxMap) - LabelscoreVoorlopig = make(fieldMapIdx) - - LabelscoreDefinitiefTracker = 0 - LabelscoreDefinitiefIdxMap = make(fieldIdxMap) - LabelscoreDefinitief = make(fieldMapIdx) - - GemeentecodeTracker = 0 - GemeentecodeIdxMap = make(fieldIdxMap) - Gemeentecode = make(fieldMapIdx) - - GemeentenaamTracker = 0 - GemeentenaamIdxMap = make(fieldIdxMap) - Gemeentenaam = make(fieldMapIdx) - - BuurtcodeTracker = 0 - BuurtcodeIdxMap = make(fieldIdxMap) - Buurtcode = make(fieldMapIdx) - - BuurtnaamTracker = 0 - BuurtnaamIdxMap = make(fieldIdxMap) - Buurtnaam = make(fieldMapIdx) - - WijkcodeTracker = 0 - WijkcodeIdxMap = make(fieldIdxMap) - Wijkcode = make(fieldMapIdx) - - WijknaamTracker = 0 - WijknaamIdxMap = make(fieldIdxMap) - Wijknaam = make(fieldMapIdx) - - ProvinciecodeTracker = 0 - ProvinciecodeIdxMap = make(fieldIdxMap) - Provinciecode = make(fieldMapIdx) - - ProvinciecodeItems = make(fieldItemsMap) - - ProvincienaamTracker = 0 - ProvincienaamIdxMap = make(fieldIdxMap) - Provincienaam = make(fieldMapIdx) - - PandGasEanAansluitingenTracker = 0 - PandGasEanAansluitingenIdxMap = make(fieldIdxMap) - PandGasEanAansluitingen = make(fieldMapIdx) - - P6GasAansluitingen2020Tracker = 0 - P6GasAansluitingen2020IdxMap = make(fieldIdxMap) - P6GasAansluitingen2020 = make(fieldMapIdx) - - P6Gasm32020Tracker = 0 - P6Gasm32020IdxMap = make(fieldIdxMap) - P6Gasm32020 = make(fieldMapIdx) - - P6Kwh2020Tracker = 0 - P6Kwh2020IdxMap = make(fieldIdxMap) - P6Kwh2020 = make(fieldMapIdx) - - PandBouwjaarTracker = 0 - PandBouwjaarIdxMap = make(fieldIdxMap) - PandBouwjaar = make(fieldMapIdx) - - PandGasAansluitingenTracker = 0 - PandGasAansluitingenIdxMap = make(fieldIdxMap) - PandGasAansluitingen = make(fieldMapIdx) +func init() { + clearBitArrays() +} - GebruiksdoelenTracker = 0 - GebruiksdoelenIdxMap = make(fieldIdxMap) - Gebruiksdoelen = make(fieldMapIdx) +func setUpRepeatedColumns() { + WoningType = NewReapeatedColumn("woning_type") + LabelscoreVoorlopig = NewReapeatedColumn("labelscore_voorlopig") + LabelscoreDefinitief = NewReapeatedColumn("labelscore_definitief") + Gemeentecode = NewReapeatedColumn("gemeentecode") + Gemeentenaam = NewReapeatedColumn("gemeentenaam") + Buurtcode = NewReapeatedColumn("buurtcode") + Buurtnaam = NewReapeatedColumn("buurtnaam") + Wijkcode = NewReapeatedColumn("wijkcode") + Wijknaam = NewReapeatedColumn("wijknaam") + Provinciecode = NewReapeatedColumn("provinciecode") + Provincienaam = NewReapeatedColumn("provincienaam") + PandGasEanAansluitingen = NewReapeatedColumn("pand_gas_ean_aansluitingen") + P6GasAansluitingen2020 = NewReapeatedColumn("p6_gas_aansluitingen_2020") + P6Gasm32020 = NewReapeatedColumn("p6_gasm3_2020") + P6Kwh2020 = NewReapeatedColumn("p6_kwh_2020") + PandBouwjaar = NewReapeatedColumn("pand_bouwjaar") + PandGasAansluitingen = NewReapeatedColumn("pand_gas_aansluitingen") + Gebruiksdoelen = NewReapeatedColumn("gebruiksdoelen") } func CreateMapstore() ModelMaps { return ModelMaps{ - WoningTypeTracker, - WoningTypeIdxMap, WoningType, - - LabelscoreVoorlopigTracker, - LabelscoreVoorlopigIdxMap, LabelscoreVoorlopig, - - LabelscoreDefinitiefTracker, - LabelscoreDefinitiefIdxMap, LabelscoreDefinitief, - - GemeentecodeTracker, - GemeentecodeIdxMap, Gemeentecode, - - GemeentenaamTracker, - GemeentenaamIdxMap, Gemeentenaam, - - BuurtcodeTracker, - BuurtcodeIdxMap, Buurtcode, - - BuurtnaamTracker, - BuurtnaamIdxMap, Buurtnaam, - - WijkcodeTracker, - WijkcodeIdxMap, Wijkcode, - - WijknaamTracker, - WijknaamIdxMap, Wijknaam, - - ProvinciecodeTracker, - ProvinciecodeIdxMap, Provinciecode, - - ProvincienaamTracker, - ProvincienaamIdxMap, Provincienaam, - - PandGasEanAansluitingenTracker, - PandGasEanAansluitingenIdxMap, PandGasEanAansluitingen, - - P6GasAansluitingen2020Tracker, - P6GasAansluitingen2020IdxMap, P6GasAansluitingen2020, - - P6Gasm32020Tracker, - P6Gasm32020IdxMap, P6Gasm32020, - - P6Kwh2020Tracker, - P6Kwh2020IdxMap, P6Kwh2020, - - PandBouwjaarTracker, - PandBouwjaarIdxMap, PandBouwjaar, - - PandGasAansluitingenTracker, - PandGasAansluitingenIdxMap, PandGasAansluitingen, - - GebruiksdoelenTracker, - GebruiksdoelenIdxMap, Gebruiksdoelen, } } func LoadMapstore(m ModelMaps) { - - WoningTypeTracker = m.WoningTypeTracker - WoningTypeIdxMap = m.WoningTypeIdxMap WoningType = m.WoningType - - LabelscoreVoorlopigTracker = m.LabelscoreVoorlopigTracker - LabelscoreVoorlopigIdxMap = m.LabelscoreVoorlopigIdxMap LabelscoreVoorlopig = m.LabelscoreVoorlopig - - LabelscoreDefinitiefTracker = m.LabelscoreDefinitiefTracker - LabelscoreDefinitiefIdxMap = m.LabelscoreDefinitiefIdxMap LabelscoreDefinitief = m.LabelscoreDefinitief - - GemeentecodeTracker = m.GemeentecodeTracker - GemeentecodeIdxMap = m.GemeentecodeIdxMap Gemeentecode = m.Gemeentecode - - GemeentenaamTracker = m.GemeentenaamTracker - GemeentenaamIdxMap = m.GemeentenaamIdxMap Gemeentenaam = m.Gemeentenaam - - BuurtcodeTracker = m.BuurtcodeTracker - BuurtcodeIdxMap = m.BuurtcodeIdxMap Buurtcode = m.Buurtcode - - BuurtnaamTracker = m.BuurtnaamTracker - BuurtnaamIdxMap = m.BuurtnaamIdxMap Buurtnaam = m.Buurtnaam - - WijkcodeTracker = m.WijkcodeTracker - WijkcodeIdxMap = m.WijkcodeIdxMap Wijkcode = m.Wijkcode - - WijknaamTracker = m.WijknaamTracker - WijknaamIdxMap = m.WijknaamIdxMap Wijknaam = m.Wijknaam - - ProvinciecodeTracker = m.ProvinciecodeTracker - ProvinciecodeIdxMap = m.ProvinciecodeIdxMap Provinciecode = m.Provinciecode - - ProvincienaamTracker = m.ProvincienaamTracker - ProvincienaamIdxMap = m.ProvincienaamIdxMap Provincienaam = m.Provincienaam - - PandGasEanAansluitingenTracker = m.PandGasEanAansluitingenTracker - PandGasEanAansluitingenIdxMap = m.PandGasEanAansluitingenIdxMap PandGasEanAansluitingen = m.PandGasEanAansluitingen - - P6GasAansluitingen2020Tracker = m.P6GasAansluitingen2020Tracker - P6GasAansluitingen2020IdxMap = m.P6GasAansluitingen2020IdxMap P6GasAansluitingen2020 = m.P6GasAansluitingen2020 - - P6Gasm32020Tracker = m.P6Gasm32020Tracker - P6Gasm32020IdxMap = m.P6Gasm32020IdxMap P6Gasm32020 = m.P6Gasm32020 - - P6Kwh2020Tracker = m.P6Kwh2020Tracker - P6Kwh2020IdxMap = m.P6Kwh2020IdxMap P6Kwh2020 = m.P6Kwh2020 - - PandBouwjaarTracker = m.PandBouwjaarTracker - PandBouwjaarIdxMap = m.PandBouwjaarIdxMap PandBouwjaar = m.PandBouwjaar - - PandGasAansluitingenTracker = m.PandGasAansluitingenTracker - PandGasAansluitingenIdxMap = m.PandGasEanAansluitingenIdxMap PandGasAansluitingen = m.PandGasAansluitingen - - GebruiksdoelenTracker = m.GebruiksdoelenTracker - GebruiksdoelenIdxMap = m.GebruiksdoelenIdxMap Gebruiksdoelen = m.Gebruiksdoelen } diff --git a/operations.go b/operations.go index 6d6a4a5..b48ccbc 100644 --- a/operations.go +++ b/operations.go @@ -41,10 +41,9 @@ type formatRespFunc func(w http.ResponseWriter, r *http.Request, items Items) type registerFormatMap map[string]formatRespFunc type Query struct { - Filters filterType - Excludes filterType - Anys filterType - BitArrays filterType + Filters filterType + Excludes filterType + Anys filterType GroupBy string Reduce string @@ -257,9 +256,6 @@ func groupByRunner(items Items, groupByParameter string) ItemsGroupedBy { return grouping } - lock.RLock() - defer lock.RUnlock() - for _, item := range items { if customGrouping == nil { GroupingKey := groupingFunc(item) @@ -348,13 +344,11 @@ func filteredEarlyExit(items *Items, operations GroupedOperations, query Query) start := (query.Page - 1) * query.PageSize end := start + query.PageSize stop := end + if query.LimitGiven { stop = limit } - lock.RLock() - defer lock.RUnlock() - for _, item := range *items { if !any(item, anys, registerFuncs) { continue @@ -386,14 +380,13 @@ func filteredEarlyExitSingle(items *Items, column string, operations GroupedOper start := (query.Page - 1) * query.PageSize end := start + query.PageSize stop := end + if query.LimitGiven { stop = limit } - lock.RLock() - defer lock.RUnlock() - for _, item := range *items { + if !any(item, anys, registerFuncs) { continue } @@ -418,7 +411,9 @@ func filteredEarlyExitSingle(items *Items, column string, operations GroupedOper break } } + results := []string{} + for k := range filteredItemsSet { // empty keys crashes frontend. // should be fixed in frontend then below can go. @@ -435,18 +430,11 @@ func filteredEarlyExitSingle(items *Items, column string, operations GroupedOper // to do fast bitwise operations. func bitArrayFilter( items *Items, - operations GroupedOperations, query Query) (Items, error) { - balock.RLock() - defer balock.RUnlock() - - lock.RLock() - defer lock.RUnlock() - combinedBitArrays := make([]bitarray.BitArray, 0) - for k := range operations.BitArrays { + for k := range BitArrays { parameter, foundkey := query.Filters["match-"+k] if len(parameter) == 0 { @@ -455,8 +443,11 @@ func bitArrayFilter( if !foundkey { continue } - ba, err := operations.BitArrays[k](parameter[0]) + + ba, err := GetBitArray(k, parameter[0]) + if err != nil { + fmt.Println(err) continue } combinedBitArrays = append(combinedBitArrays, ba) @@ -511,7 +502,7 @@ func runQuery(items *Items, query Query, operations GroupedOperations) (Items, i } var nextItems *Items - filteredItems, err := bitArrayFilter(items, operations, query) + filteredItems, err := bitArrayFilter(items, query) if err != nil { nextItems = items @@ -563,9 +554,6 @@ func filtered(items *Items, operations GroupedOperations, query Query) Items { filters := query.Filters anys := query.Anys - lock.RLock() - defer lock.RUnlock() - for _, item := range *items { if !any(item, anys, registerFuncs) { continue diff --git a/storage_operations.go b/storage_operations.go index c89fd86..a7accf3 100644 --- a/storage_operations.go +++ b/storage_operations.go @@ -227,15 +227,15 @@ func loadAtStart(storagename string, filename string, indexed bool) { ITEMS.FillIndexes() - /* - if indexed { - start = time.Now() - msg := fmt.Sprint("Creating index") - fmt.Printf(WarningColorN, msg) - makeIndex() - diff = time.Since(start) - msg = fmt.Sprint("Index set time: ", diff) - fmt.Printf(WarningColorN, msg) - } + /* should be added to FillIndexes + if indexed { + start = time.Now() + msg := fmt.Sprint("Creating index") + fmt.Printf(WarningColorN, msg) + makeIndex() + diff = time.Since(start) + msg = fmt.Sprint("Index set time: ", diff) + fmt.Printf(WarningColorN, msg) + } */ } diff --git a/storage_operations_test.go b/storage_operations_test.go new file mode 100644 index 0000000..da3f98f --- /dev/null +++ b/storage_operations_test.go @@ -0,0 +1,44 @@ +package main + +import ( + "testing" +) + +func TestBytesSaving(t *testing.T) { + + size := len(ITEMS) + + if size != 10 { + t.Errorf("expected 10 ITEMS got %d", size) + } + +} + +func TestBytes(t *testing.T) { + + saveAsBytes("testdata/testbytes") + ITEMS = Items{} + loadAsBytes("testdata/testbytes") + + if len(ITEMS) != 10 { + t.Error("bytes save / load failed") + } + + saveAsBytes("testdata/testbytesz") + ITEMS = Items{} + loadAsBytes("testdata/testbytesz") + if len(ITEMS) != 10 { + t.Error("bytes compressed save / load failed") + } + +} + +func TestJson(t *testing.T) { + + saveAsJsonZipped("testdata/test.json") + ITEMS = Items{} // Clear ITEMS + loadAsJsonZipped("testdata/test.json") + if len(ITEMS) != 10 { + t.Error("bytes compressed save / load failed") + } +} diff --git a/store.go b/store.go index 1f8fef5..8797f7b 100644 --- a/store.go +++ b/store.go @@ -3,6 +3,7 @@ package main import ( "fmt" "log" + "sync" "time" ) @@ -18,14 +19,17 @@ var ITEMS Items var itemChan ItemsChannel +// single item map lock when updating new items +var lock = sync.RWMutex{} + func init() { ITEMS = Items{} } func ItemChanWorker(itemChan ItemsChannel) { label := 0 - for items := range itemChan { + lock.Lock() for _, itm := range items { if itm != nil { smallItem := itm.Shrink(label) @@ -39,6 +43,7 @@ func ItemChanWorker(itemChan ItemsChannel) { label++ } } + lock.Unlock() } } @@ -46,8 +51,11 @@ func (items Items) FillIndexes() { start := time.Now() + lock.Lock() + defer lock.Unlock() + clearGeoIndex() - initBitarrays() + clearBitArrays() for i := range items { ITEMS[i].StoreBitArrayColumns() From 4a306f604e1d0b991e39089e63a12af7e8e30009 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 27 Apr 2021 01:25:53 +0200 Subject: [PATCH 48/54] wip: fix model creation after rewrite --- extras/create_model.py | 4 ++-- .../templates/loadRepeatColumn.template.jinja2 | 5 ----- extras/templates/model.template.jinja2 | 4 +--- extras/templates/repeatColumn.template.jinja2 | 5 ----- ...ataselectie_vbo_energie_20210217.head.csv.gz | Bin 708 -> 717 bytes 5 files changed, 3 insertions(+), 15 deletions(-) delete mode 100644 extras/templates/loadRepeatColumn.template.jinja2 delete mode 100644 extras/templates/repeatColumn.template.jinja2 diff --git a/extras/create_model.py b/extras/create_model.py index 623c1e2..0b68258 100644 --- a/extras/create_model.py +++ b/extras/create_model.py @@ -48,7 +48,7 @@ if os.path.isfile(config): with open(config, 'r') as stream: - cfg = yaml.load(stream)['model'] + cfg = yaml.load(stream, Loader=yaml.FullLoader)['model'] env = Environment( @@ -199,7 +199,7 @@ def gocamelCase(string): # create bitarrays with item labels for column values. bitArrayStores = [] for c1, c2 in zip(bitarray, bitarray_org): - onerow = f'\tSetBitArray("c2", i.{c1}, i.Label)\n' + onerow = f'\tSetBitArray("{c2}", i.{c1}, i.Label)\n' bitArrayStores.append(onerow) diff --git a/extras/templates/loadRepeatColumn.template.jinja2 b/extras/templates/loadRepeatColumn.template.jinja2 deleted file mode 100644 index d460706..0000000 --- a/extras/templates/loadRepeatColumn.template.jinja2 +++ /dev/null @@ -1,5 +0,0 @@ - - {{columnName}}Tracker = m.{{columnName}}Tracker - {{columnName}}IdxMap = m.{{columnName}}IdxMap - {{columnName}} = m.{{columnName}} - diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index e974cca..d312b64 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -42,9 +42,7 @@ type registerReduce map[string]func(Items) map[string]string type registerBitArray map[string]func(s string) (bitarray.BitArray, error) -type fieldIdxMap map[string]uint16 -type fieldMapIdx map[uint16]string -type fieldItemsMap map[uint16]bitarray.BitArray +type fieldBitarrayMap map[uint32]bitarray.BitArray type IndexMap[string]int64 diff --git a/extras/templates/repeatColumn.template.jinja2 b/extras/templates/repeatColumn.template.jinja2 deleted file mode 100644 index a8a02fd..0000000 --- a/extras/templates/repeatColumn.template.jinja2 +++ /dev/null @@ -1,5 +0,0 @@ - - {{columnName}}Tracker, - {{columnName}}IdxMap, - {{columnName}}, - diff --git a/testdata/dataselectie_vbo_energie_20210217.head.csv.gz b/testdata/dataselectie_vbo_energie_20210217.head.csv.gz index 1bd72087e3a82385e314b1bfe24c6490743ccbbb..2505c2b3f105ef8a08c52e6c090e91082a664387 100644 GIT binary patch delta 682 zcmV;b0#*IQ1q;(>(Itk$Sn**UE5|; znj-erHhAaP_G{-hrnEkSWb($g*Hc;iR`&}_v$SQq3XRP+Av9&E-FhnL_QHR87oEKT zeQhfX?(A;Uz3Gjqz+84+vyW#O@AR#^EOv4~+^KaFHqNiy-r2RipAT*byf!}1)~3y@ z@tHBcEjtJY{+;h5XhK(KF3%{YxL+)XrITgTxr&YKV*6!e#So1tvwp*CoLNQpF)Ty3 zy)>p7$25z_E}PC3Z5}LePo94zm`O%3#)J}0QrYj+OSso?DtW5dPNH~tG{IVeL@*)8 zCkY~oBeFLa1PhX&GiT!h4$^vr;q&TU|2{gd+Gv~HROsa6um{H&orggTMQdFNQKQdq zuI73t-L^7K5s%)QHk!3X?>|w6&L9u+7pI}@moKg!^#1(y*@wk*ewcq^K^aXs5llib z3qcQ=QW|r`m5`K@10;u_Fd{tV7)g#bB3g6MzZb?waDUPH6=c+pTkt5&VUIZ;HNvCl zln$KM%hJRnQ`e@6ke@yx^`IDSL-2P5l!8}D=797FkiL;V>?r|KT7dsKATa{OxJGPD z==Apm#vcNkrMrZVTm*lJ^iGHf#qhpVBSeElF&pk8v=u0VlNLDC%@ zC72LgVW#=P&=GFJ!B~KPTZP@=0|}(@^a|sIGcv9))_Wh*5&s%T2K@!3dt}f+h~7?! QK4L;Y0g>NZ6#fhV02JU!!2kdN delta 673 zcmV;S0$%;i1;hmhABzYGixYp52Qhzh(=ZT*@BE64Pjp7K`m!V++XIvV+8+2gdX!iV zs%^>Gaxx6Ve`n<;#jX!2!vWelGl}+7}W?LMDjywkV#rrgPKxKo?fZ>(F{y|X)eKkx03c%xmBuXUFj z?Q*SMSM`t%{5uyjXno)0w#X@_I4%~$(#f*vT+K#yx&1n_f)83(c?@`mGq34Bho$eg zH(IyjoM!o$W!u}bE4%^j$%}skGrN4g}LN16_k=AB!;9gB0S?5362#aN^#JCl-dPwf8DzkRMgJ{c%9iY3XG;C+rg13OH(c}Q~YS? z2>0P&oPvJngx%u<1*Guw3gd(`GCpA}dLPqC|Bnj#3rK%ZK?4yXdYBM>!i0VS3**u< H{tN&B?~h6> From e6f1e4f8aa7f617d1c7a5ec1a57dfc52e4e31797 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 27 Apr 2021 14:02:51 +0200 Subject: [PATCH 49/54] done: code generation now works correctly, added new model and model_maps with more bitarray columns --- column.go | 11 + csv.go | 16 +- extras/config.yaml | 24 +- extras/create_model.py | 54 ++-- .../templates/bitarrayGetter.template.jinja2 | 19 -- extras/templates/itemFullColumn.jinja2 | 2 - extras/templates/model.template.jinja2 | 27 +- extras/templates/modelmap.template.jinja2 | 22 +- extras/templates/shrinkColumn.jinja2 | 12 - extras/templates/shrinkVars.jinja2 | 6 - .../templates/storebitarray.template.jinja2 | 9 - model.go | 299 +++++++++++++----- model_maps.go | 46 ++- 13 files changed, 331 insertions(+), 216 deletions(-) delete mode 100644 extras/templates/bitarrayGetter.template.jinja2 delete mode 100644 extras/templates/itemFullColumn.jinja2 delete mode 100644 extras/templates/shrinkColumn.jinja2 delete mode 100644 extras/templates/shrinkVars.jinja2 delete mode 100644 extras/templates/storebitarray.template.jinja2 diff --git a/column.go b/column.go index 722dea1..87a0add 100644 --- a/column.go +++ b/column.go @@ -4,6 +4,7 @@ import ( "errors" "github.com/Workiva/go-datastructures/bitarray" "log" + "strings" ) type fieldIdxMap map[string]uint32 @@ -72,6 +73,16 @@ func (m *MappedColumn) GetValue(idx uint32) string { return m.Field[idx] } +func (m *MappedColumn) GetArrayValue(idxs []uint32) string { + + result := make([]string, 0) + for _, v := range idxs { + vs := m.GetValue(v) + result = append(result, vs) + } + return strings.Join(result, ", ") +} + func (m *MappedColumn) GetIndex(s string) uint32 { return m.Idx[s] } diff --git a/csv.go b/csv.go index a4f49f8..12674f0 100644 --- a/csv.go +++ b/csv.go @@ -151,14 +151,20 @@ func importCSV(filename string, itemChan ItemsChannel, defer file.Close() bar = NewProgressBar(file) - fz, err := pgzip.NewReader(io.TeeReader(file, bar)) - if err != nil { - return err + if strings.HasSuffix(filename, ".gz") { + fz, err := pgzip.NewReader(io.TeeReader(file, bar)) + + if err != nil { + return err + } + defer fz.Close() + reader = csv.NewDialectReader(fz, dialect) + } else { + fz := io.TeeReader(file, bar) + reader = csv.NewDialectReader(fz, dialect) } - defer fz.Close() - reader = csv.NewDialectReader(fz, dialect) } else { reader = csv.NewDialectReader(os.Stdin, dialect) } diff --git a/extras/config.yaml b/extras/config.yaml index f970d83..5acd74b 100644 --- a/extras/config.yaml +++ b/extras/config.yaml @@ -10,21 +10,21 @@ model: labelscore_definitief: b labelscore_voorlopig: b numid: u - oppervlakte: u - p6_gas_aansluitingen_2020: u - p6_gasm3_2020: u - p6_kwh_2020: u - p6_totaal_pandoppervlak_m2: u - pand_bouwjaar: u - pand_gas_aansluitingen: u - pand_gas_ean_aansluitingen: u - pid: u + oppervlakte: r + p6_gas_aansluitingen_2020: r + p6_gasm3_2020: r + p6_kwh_2020: r + p6_totaal_pandoppervlak_m2: r + pand_bouwjaar: r + pand_gas_aansluitingen: r + pand_gas_ean_aansluitingen: r + pid: b point: g - postcode: u + postcode: r provinciecode: b provincienaam: r - vid: u + vid: r wijkcode: b wijknaam: r woning_type: b - woningequivalent: u + woningequivalent: r diff --git a/extras/create_model.py b/extras/create_model.py index 0b68258..891a915 100644 --- a/extras/create_model.py +++ b/extras/create_model.py @@ -9,7 +9,7 @@ - Repeated option to store repeated values in a map and each individual items - only stores uint16 reference to map key. + only stores uint32 reference to map key. - BitArray option which is like Repeated value but also creates a map[key]bitmap for all @@ -178,23 +178,21 @@ def gocamelCase(string): initRepeatColumns = [] repeatColumnNames = [] loadRepeatColumnNames = [] +mappedColumns = [] -for columnName in repeated: - initRow = f"\t {columnName} = NewReapeatedColumn()\n" +for columnName, c2 in zip(repeated, repeated_org): + initRow = f'\t {columnName} = NewReapeatedColumn("{c2}")\n' initRepeatColumns.append(initRow) - repeatRow = f"\t {columnName} \n" + repeatRow = f"\t {columnName}, \n" repeatColumnNames.append(repeatRow) loadRow = f"\t {columnName} = m.{columnName} \n" loadRepeatColumnNames.append(loadRow) + mappedColumnsRow = f"\t {columnName} MappedColumn \n" + mappedColumns.append(mappedColumnsRow) -# setup initial data structs for each bitarray column -initBitarrays = [] -for columnName in bitarray: - onerow = f"\t {columnName}Items = make(fieldItemsMap)\n" - initBitarrays.append(onerow) # create bitarrays with item labels for column values. bitArrayStores = [] @@ -205,20 +203,19 @@ def gocamelCase(string): # create ItemFull struct fields columnsItemIn = [] -jsonColumn = env.get_template('itemFullColumn.jinja2') + for c1, c2 in zip(allcolumns, allcolumns_org): - onerow = jsonColumn.render(c1=c1, c2=c2) + onerow = f'\t {c1} string `json:"{c2}"`\n' columnsItemIn.append(onerow) # create ItemFull struct fields columnsItemOut = [] -jsonColumn = env.get_template('itemFullColumn.jinja2') for c1, c2 in zip(allcolumns, allcolumns_org): if c1 in ignored: continue - onerow = jsonColumn.render(c1=c1, c2=c2) + onerow = f'\t {c1} string `json:"{c2}"`\n' columnsItemOut.append(onerow) # create Item struct fields @@ -230,20 +227,18 @@ def gocamelCase(string): onerow = f"\t{c1} string\n" if c1 in repeated: - onerow = f"\t{c1} uint16\n" + onerow = f"\t{c1} uint32\n" columnsItem.append(onerow) # create Shrink code for repeated fields -# where we map uint16 to a string value. +# where we map uint32 to a string value. shrinkVars = [] shrinkItems = [] -shrinkvartemplate = env.get_template('shrinkVars.jinja2') -shrinktemplate = env.get_template('shrinkColumn.jinja2') for c in repeated: - shrinkVars.append( - shrinkvartemplate.render(column=c, bitarray=c in bitarray)) + mappedcolumn = f"var {c} MappedColumn\n" + shrinkVars.append(mappedcolumn) shrinkItems.append(f"\t {c}.Store(i.{c})\n") @@ -291,7 +286,7 @@ def gocamelCase(string): lookup = f"i.{c}" if c in repeated: - lookup = f"{c}[i.{c}]" + lookup = f"{c}.GetValue(i.{c})" txt = filtertemplate.render(column=c, lookup=lookup) columnFilters.append(txt) @@ -317,8 +312,8 @@ def gocamelCase(string): c2 = f"items[i].{c} > items[j].{c}" if c in repeated: - c1 = f"{c}[items[i].{c}] < {c}[items[j].{c}]" - c2 = f"{c}[items[i].{c}] > {c}[items[j].{c}]" + c1 = f"{c}.GetValue(items[i].{c}) < {c}.GetValue(items[j].{c})" + c2 = f"{c}.GetValue(items[i].{c}) > {c}.GetValue(items[j].{c})" txt = sortTemplate.render(co=co, c1=c1, c2=c2) sortColumns.append(txt) @@ -339,11 +334,9 @@ def gocamelCase(string): geometryGetter = f"Getters{geocolumns[0]}(&i)" output = modeltemplate.render( - #initRepeatColumns=''.join(initRepeatColumns), columnsItemIn=''.join(columnsItemIn), columnsItemOut=''.join(columnsItemOut), columnsItem=''.join(columnsItem), - # shrinkVars=''.join(shrinkVars), shrinkItems=''.join(shrinkItems), shrinkItemFields=''.join(shrinkItemFields), expandItemFields=''.join(expandItemFields), @@ -367,16 +360,17 @@ def gocamelCase(string): mapsoutput = mapstemplate.render( initRepeatColumns=''.join(initRepeatColumns), - repeatColumnNames = ''.join(repeatColumnNames), - loadRepeatColumnNames = ''.join(loadRepeatColumnNames), - initBitarrays=''.join(initBitarrays), + repeatColumnNames=''.join(repeatColumnNames), + loadRepeatColumnNames=''.join(loadRepeatColumnNames), + mappedColumns=''.join(mappedColumns), shrinkVars=''.join(shrinkVars), - ) -f = open('modelmaps.go', 'w') +f = open('model_maps.go', 'w') f.write(mapsoutput) f.close() -print('model hashmaps saved in modelmaps.go') +print('model hashmaps saved in model_maps.go') +os.system("go fmt model.go") +os.system("go fmt model_maps.go") diff --git a/extras/templates/bitarrayGetter.template.jinja2 b/extras/templates/bitarrayGetter.template.jinja2 deleted file mode 100644 index f04b600..0000000 --- a/extras/templates/bitarrayGetter.template.jinja2 +++ /dev/null @@ -1,19 +0,0 @@ -// GetBitArray{{columnName}} for given v string see if there is -// a bitarray created. -func GetBitArray{{columnName}}(v string) (bitarray.BitArray, error) { - - bpi, ok := {{columnName}}IdxMap[v] - - if !ok { - return nil, errors.New("no bitarray filter found for column value {{columnName}}") - } - - ba, ok := {{columnName}}Items[bpi] - - if !ok { - return nil, errors.New("no bitarray filter found for column idx value {{columnName}}") - } - - return ba, nil -} - diff --git a/extras/templates/itemFullColumn.jinja2 b/extras/templates/itemFullColumn.jinja2 deleted file mode 100644 index 2113443..0000000 --- a/extras/templates/itemFullColumn.jinja2 +++ /dev/null @@ -1,2 +0,0 @@ - {{c1}} string `json:"{{c2}}"` - diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index d312b64..7beb896 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -26,11 +26,10 @@ package main import ( - "log" + "encoding/json" "sort" "strconv" "strings" - "sync" "errors" "github.com/Workiva/go-datastructures/bitarray" @@ -41,12 +40,8 @@ type registerGettersMap map[string]func(*Item) string type registerReduce map[string]func(Items) map[string]string type registerBitArray map[string]func(s string) (bitarray.BitArray, error) - type fieldBitarrayMap map[uint32]bitarray.BitArray -type IndexMap[string]int64 - - {{itemStructs}} @@ -74,12 +69,9 @@ func (i Item) MarshalJSON() ([]byte, error) { return json.Marshal(i.Serialize()) } -// Shrink create smaller Item using uint16 +// Shrink create smaller Item using uint32 func (i ItemIn) Shrink(label int) Item { - // Hashmap lookup for index - IndexMap[i.{{indexcolumn}}] = label - {{shrinkItems}} return Item{ @@ -92,7 +84,7 @@ func (i ItemIn) Shrink(label int) Item { } // Store selected columns in seperate map[columnvalue]bitarray -// for gast item lookup +// for fast item selection func (i Item) StoreBitArrayColumns() { {{ bitArrayStores }} } @@ -244,25 +236,20 @@ type sortLookup map[string]func(int, int) bool func createSort(items Items) sortLookup { sortFuncs := sortLookup{ - {{sortColumns}} - - /* - "ekey": func(i, j int) bool { return items[i].Ekey < items[j].Ekey }, - "-ekey": func(i, j int) bool { return items[i].Ekey > items[j].Ekey }, - */ } return sortFuncs } func sortBy(items Items, sortingL []string) (Items, []string) { - sortFuncs := createSort(items) for _, sortFuncName := range sortingL { - sortFunc := sortFuncs[sortFuncName] - sort.Slice(items, sortFunc) + sortFunc, ok := sortFuncs[sortFuncName] + if ok { + sort.Slice(items, sortFunc) + } } // TODO must be nicer way diff --git a/extras/templates/modelmap.template.jinja2 b/extras/templates/modelmap.template.jinja2 index acb1bbc..c5bc4f5 100644 --- a/extras/templates/modelmap.template.jinja2 +++ b/extras/templates/modelmap.template.jinja2 @@ -9,43 +9,39 @@ package main type ModelMaps struct { - {{MappedColumns}} +{{mappedColumns}} } +var BitArrays map[string]fieldBitarrayMap {{shrinkVars}} -func initBitarrays() { - -{{initBitarrays}} - +func clearBitArrays() { + BitArrays = make(map[string]fieldBitarrayMap) } func init() { + clearBitArrays() setUpRepeatedColumns() } func setUpRepeatedColumns() { - initBitarrays() - - {{initRepeatColumns}} - +{{initRepeatColumns}} } func CreateMapstore() ModelMaps { return ModelMaps{ - - {{repeatColumnNames}} - +{{repeatColumnNames}} } } func LoadMapstore(m ModelMaps) { - {{loadRepeatColumnNames}} +{{loadRepeatColumnNames}} + } diff --git a/extras/templates/shrinkColumn.jinja2 b/extras/templates/shrinkColumn.jinja2 deleted file mode 100644 index a63bcb0..0000000 --- a/extras/templates/shrinkColumn.jinja2 +++ /dev/null @@ -1,12 +0,0 @@ - - //check if column value is already present - //else store new key - if _, ok := {{column}}IdxMap[i.{{column}}]; !ok { - // store {{column}} in map at current index of tracker - {{column}}[{{column}}Tracker] = i.{{column}} - // store key - idx - {{column}}IdxMap[i.{{column}}] = {{column}}Tracker - // increase tracker - {{column}}Tracker += 1 - } - diff --git a/extras/templates/shrinkVars.jinja2 b/extras/templates/shrinkVars.jinja2 deleted file mode 100644 index 9dacdfc..0000000 --- a/extras/templates/shrinkVars.jinja2 +++ /dev/null @@ -1,6 +0,0 @@ - -var {{column}} MappedColumn -{% if bitarray %} -var {{column}}Items fieldBitarrayMap -{% endif %} - diff --git a/extras/templates/storebitarray.template.jinja2 b/extras/templates/storebitarray.template.jinja2 deleted file mode 100644 index f9494b1..0000000 --- a/extras/templates/storebitarray.template.jinja2 +++ /dev/null @@ -1,9 +0,0 @@ - - // Column {{columnName}} has byte arrays for - ba, ok = {{columnName}}Items[i.{{columnName}}] - if !ok { - ba = bitarray.NewSparseBitArray() - {{columnName}}Items[i.{{columnName}}] = ba - } - - ba.SetBit(uint64(i.Label)) diff --git a/model.go b/model.go index 9bbebe0..68dc2c7 100644 --- a/model.go +++ b/model.go @@ -42,10 +42,6 @@ type registerReduce map[string]func(Items) map[string]string type registerBitArray map[string]func(s string) (bitarray.BitArray, error) type fieldBitarrayMap map[uint32]bitarray.BitArray -func init() { - setUpRepeatedColumns() -} - type ItemIn struct { Pid string `json:"pid"` Vid string `json:"vid"` @@ -110,12 +106,12 @@ type ItemOut struct { type Item struct { Label int // internal index in ITEMS - Pid string - Vid string + Pid uint32 + Vid uint32 Numid string - Postcode string - Oppervlakte string - Woningequivalent string + Postcode uint32 + Oppervlakte uint32 + Woningequivalent uint32 Adres string WoningType uint32 LabelscoreVoorlopig uint32 @@ -134,7 +130,7 @@ type Item struct { P6GasAansluitingen2020 uint32 P6Gasm32020 uint32 P6Kwh2020 uint32 - P6TotaalPandoppervlakM2 string + P6TotaalPandoppervlakM2 uint32 PandBouwjaar uint32 PandGasAansluitingen uint32 Gebruiksdoelen []uint32 @@ -144,9 +140,14 @@ func (i Item) MarshalJSON() ([]byte, error) { return json.Marshal(i.Serialize()) } -// Shrink create smaller Item using uint16 +// Shrink create smaller Item using uint32 func (i ItemIn) Shrink(label int) Item { + Pid.Store(i.Pid) + Vid.Store(i.Vid) + Postcode.Store(i.Postcode) + Oppervlakte.Store(i.Oppervlakte) + Woningequivalent.Store(i.Woningequivalent) WoningType.Store(i.WoningType) LabelscoreVoorlopig.Store(i.LabelscoreVoorlopig) LabelscoreDefinitief.Store(i.LabelscoreDefinitief) @@ -158,10 +159,11 @@ func (i ItemIn) Shrink(label int) Item { Wijknaam.Store(i.Wijknaam) Provinciecode.Store(i.Provinciecode) Provincienaam.Store(i.Provincienaam) - PandGasAansluitingen.Store(i.PandGasEanAansluitingen) + PandGasEanAansluitingen.Store(i.PandGasEanAansluitingen) P6GasAansluitingen2020.Store(i.P6GasAansluitingen2020) P6Gasm32020.Store(i.P6Gasm32020) P6Kwh2020.Store(i.P6Kwh2020) + P6TotaalPandoppervlakM2.Store(i.P6TotaalPandoppervlakM2) PandBouwjaar.Store(i.PandBouwjaar) PandGasAansluitingen.Store(i.PandGasAansluitingen) @@ -171,12 +173,12 @@ func (i ItemIn) Shrink(label int) Item { label, - i.Pid, - i.Vid, + Pid.GetIndex(i.Pid), + Vid.GetIndex(i.Vid), i.Numid, - i.Postcode, - i.Oppervlakte, - i.Woningequivalent, + Postcode.GetIndex(i.Postcode), + Oppervlakte.GetIndex(i.Oppervlakte), + Woningequivalent.GetIndex(i.Woningequivalent), i.Adres, WoningType.GetIndex(i.WoningType), LabelscoreVoorlopig.GetIndex(i.LabelscoreVoorlopig), @@ -195,7 +197,7 @@ func (i ItemIn) Shrink(label int) Item { P6GasAansluitingen2020.GetIndex(i.P6GasAansluitingen2020), P6Gasm32020.GetIndex(i.P6Gasm32020), P6Kwh2020.GetIndex(i.P6Kwh2020), - i.P6TotaalPandoppervlakM2, + P6TotaalPandoppervlakM2.GetIndex(i.P6TotaalPandoppervlakM2), PandBouwjaar.GetIndex(i.PandBouwjaar), PandGasAansluitingen.GetIndex(i.PandGasAansluitingen), doelen, @@ -204,23 +206,27 @@ func (i ItemIn) Shrink(label int) Item { // Store selected columns in seperate map[columnvalue]bitarray // for fast item selection -// BitArrays cannot be serialized func (i Item) StoreBitArrayColumns() { - + SetBitArray("pid", i.Pid, i.Label) SetBitArray("woning_type", i.WoningType, i.Label) SetBitArray("labelscore_voorlopig", i.LabelscoreVoorlopig, i.Label) + SetBitArray("labelscore_definitief", i.LabelscoreDefinitief, i.Label) + SetBitArray("gemeentecode", i.Gemeentecode, i.Label) SetBitArray("buurtcode", i.Buurtcode, i.Label) + SetBitArray("wijkcode", i.Wijkcode, i.Label) + SetBitArray("provinciecode", i.Provinciecode, i.Label) + } func (i Item) Serialize() ItemOut { - return ItemOut{ - i.Pid, - i.Vid, + + Pid.GetValue(i.Pid), + Vid.GetValue(i.Vid), i.Numid, - i.Postcode, - i.Oppervlakte, - i.Woningequivalent, + Postcode.GetValue(i.Postcode), + Oppervlakte.GetValue(i.Oppervlakte), + Woningequivalent.GetValue(i.Woningequivalent), i.Adres, WoningType.GetValue(i.WoningType), LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), @@ -239,10 +245,10 @@ func (i Item) Serialize() ItemOut { P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020), P6Gasm32020.GetValue(i.P6Gasm32020), P6Kwh2020.GetValue(i.P6Kwh2020), - i.P6TotaalPandoppervlakM2, + P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2), PandBouwjaar.GetValue(i.PandBouwjaar), PandGasAansluitingen.GetValue(i.PandGasAansluitingen), - GettersGebruiksdoelen(&i), + Gebruiksdoelen.GetArrayValue(i.Gebruiksdoelen), } } @@ -318,12 +324,12 @@ func (i Item) Row() []string { return []string{ - i.Pid, - i.Vid, + Pid.GetValue(i.Pid), + Vid.GetValue(i.Vid), i.Numid, - i.Postcode, - i.Oppervlakte, - i.Woningequivalent, + Postcode.GetValue(i.Postcode), + Oppervlakte.GetValue(i.Oppervlakte), + Woningequivalent.GetValue(i.Woningequivalent), i.Adres, WoningType.GetValue(i.WoningType), LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), @@ -342,15 +348,15 @@ func (i Item) Row() []string { P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020), P6Gasm32020.GetValue(i.P6Gasm32020), P6Kwh2020.GetValue(i.P6Kwh2020), - i.P6TotaalPandoppervlakM2, + P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2), PandBouwjaar.GetValue(i.PandBouwjaar), PandGasAansluitingen.GetValue(i.PandGasAansluitingen), - GettersGebruiksdoelen(&i), + Gebruiksdoelen.GetArrayValue(i.Gebruiksdoelen), } } func (i Item) GetIndex() string { - return GettersAdres(&i) + return GettersPid(&i) } func (i Item) GetGeometry() string { @@ -359,42 +365,42 @@ func (i Item) GetGeometry() string { // contain filter Pid func FilterPidContains(i *Item, s string) bool { - return strings.Contains(i.Pid, s) + return strings.Contains(Pid.GetValue(i.Pid), s) } // startswith filter Pid func FilterPidStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Pid, s) + return strings.HasPrefix(Pid.GetValue(i.Pid), s) } // match filters Pid func FilterPidMatch(i *Item, s string) bool { - return i.Pid == s + return Pid.GetValue(i.Pid) == s } // getter Pid func GettersPid(i *Item) string { - return i.Pid + return Pid.GetValue(i.Pid) } // contain filter Vid func FilterVidContains(i *Item, s string) bool { - return strings.Contains(i.Vid, s) + return strings.Contains(Vid.GetValue(i.Vid), s) } // startswith filter Vid func FilterVidStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Vid, s) + return strings.HasPrefix(Vid.GetValue(i.Vid), s) } // match filters Vid func FilterVidMatch(i *Item, s string) bool { - return i.Vid == s + return Vid.GetValue(i.Vid) == s } // getter Vid func GettersVid(i *Item) string { - return i.Vid + return Vid.GetValue(i.Vid) } // contain filter Numid @@ -419,62 +425,62 @@ func GettersNumid(i *Item) string { // contain filter Postcode func FilterPostcodeContains(i *Item, s string) bool { - return strings.Contains(i.Postcode, s) + return strings.Contains(Postcode.GetValue(i.Postcode), s) } // startswith filter Postcode func FilterPostcodeStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Postcode, s) + return strings.HasPrefix(Postcode.GetValue(i.Postcode), s) } // match filters Postcode func FilterPostcodeMatch(i *Item, s string) bool { - return i.Postcode == s + return Postcode.GetValue(i.Postcode) == s } // getter Postcode func GettersPostcode(i *Item) string { - return i.Postcode + return Postcode.GetValue(i.Postcode) } // contain filter Oppervlakte func FilterOppervlakteContains(i *Item, s string) bool { - return strings.Contains(i.Oppervlakte, s) + return strings.Contains(Oppervlakte.GetValue(i.Oppervlakte), s) } // startswith filter Oppervlakte func FilterOppervlakteStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Oppervlakte, s) + return strings.HasPrefix(Oppervlakte.GetValue(i.Oppervlakte), s) } // match filters Oppervlakte func FilterOppervlakteMatch(i *Item, s string) bool { - return i.Oppervlakte == s + return Oppervlakte.GetValue(i.Oppervlakte) == s } // getter Oppervlakte func GettersOppervlakte(i *Item) string { - return i.Oppervlakte + return Oppervlakte.GetValue(i.Oppervlakte) } // contain filter Woningequivalent func FilterWoningequivalentContains(i *Item, s string) bool { - return strings.Contains(i.Woningequivalent, s) + return strings.Contains(Woningequivalent.GetValue(i.Woningequivalent), s) } // startswith filter Woningequivalent func FilterWoningequivalentStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Woningequivalent, s) + return strings.HasPrefix(Woningequivalent.GetValue(i.Woningequivalent), s) } // match filters Woningequivalent func FilterWoningequivalentMatch(i *Item, s string) bool { - return i.Woningequivalent == s + return Woningequivalent.GetValue(i.Woningequivalent) == s } // getter Woningequivalent func GettersWoningequivalent(i *Item) string { - return i.Woningequivalent + return Woningequivalent.GetValue(i.Woningequivalent) } // contain filter Adres @@ -649,7 +655,7 @@ func FilterWijkcodeStartsWith(i *Item, s string) bool { // match filters Wijkcode func FilterWijkcodeMatch(i *Item, s string) bool { - return i.Wijkcode == Wijkcode.GetIndex(s) + return Wijkcode.GetValue(i.Wijkcode) == s } // getter Wijkcode @@ -669,7 +675,7 @@ func FilterWijknaamStartsWith(i *Item, s string) bool { // match filters Wijknaam func FilterWijknaamMatch(i *Item, s string) bool { - return Wijknaam.GetIndex(s) == i.Wijknaam + return Wijknaam.GetValue(i.Wijknaam) == s } // getter Wijknaam @@ -839,22 +845,22 @@ func GettersP6Kwh2020(i *Item) string { // contain filter P6TotaalPandoppervlakM2 func FilterP6TotaalPandoppervlakM2Contains(i *Item, s string) bool { - return strings.Contains(i.P6TotaalPandoppervlakM2, s) + return strings.Contains(P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2), s) } // startswith filter P6TotaalPandoppervlakM2 func FilterP6TotaalPandoppervlakM2StartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.P6TotaalPandoppervlakM2, s) + return strings.HasPrefix(P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2), s) } // match filters P6TotaalPandoppervlakM2 func FilterP6TotaalPandoppervlakM2Match(i *Item, s string) bool { - return i.P6TotaalPandoppervlakM2 == s + return P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2) == s } // getter P6TotaalPandoppervlakM2 func GettersP6TotaalPandoppervlakM2(i *Item) string { - return i.P6TotaalPandoppervlakM2 + return P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2) } // contain filter PandBouwjaar @@ -933,12 +939,7 @@ func FilterGebruiksdoelenMatch(i *Item, s string) bool { // getter Gebruiksdoelen func GettersGebruiksdoelen(i *Item) string { - doelen := make([]string, 0) - for _, v := range i.Gebruiksdoelen { - vs := Gebruiksdoelen.GetValue(v) - doelen = append(doelen, vs) - } - return strings.Join(doelen, ", ") + return Gebruiksdoelen.GetArrayValue(i.Gebruiksdoelen) } // getter Gebruiksdoelen @@ -1017,7 +1018,6 @@ func init() { RegisterGroupBy = make(registerGroupByFunc) RegisterGetters = make(registerGettersMap) RegisterReduce = make(registerReduce) - RegisterBitArray = make(registerBitArray) // register search filter. //RegisterFuncMap["search"] = 'EDITYOURSELF' @@ -1249,23 +1249,35 @@ func createSort(items Items) sortLookup { sortFuncs := sortLookup{ - "pid": func(i, j int) bool { return items[i].Pid < items[j].Pid }, - "-pid": func(i, j int) bool { return items[i].Pid > items[j].Pid }, + "pid": func(i, j int) bool { return Pid.GetValue(items[i].Pid) < Pid.GetValue(items[j].Pid) }, + "-pid": func(i, j int) bool { return Pid.GetValue(items[i].Pid) > Pid.GetValue(items[j].Pid) }, - "vid": func(i, j int) bool { return items[i].Vid < items[j].Vid }, - "-vid": func(i, j int) bool { return items[i].Vid > items[j].Vid }, + "vid": func(i, j int) bool { return Vid.GetValue(items[i].Vid) < Vid.GetValue(items[j].Vid) }, + "-vid": func(i, j int) bool { return Vid.GetValue(items[i].Vid) > Vid.GetValue(items[j].Vid) }, "numid": func(i, j int) bool { return items[i].Numid < items[j].Numid }, "-numid": func(i, j int) bool { return items[i].Numid > items[j].Numid }, - "postcode": func(i, j int) bool { return items[i].Postcode < items[j].Postcode }, - "-postcode": func(i, j int) bool { return items[i].Postcode > items[j].Postcode }, + "postcode": func(i, j int) bool { + return Postcode.GetValue(items[i].Postcode) < Postcode.GetValue(items[j].Postcode) + }, + "-postcode": func(i, j int) bool { + return Postcode.GetValue(items[i].Postcode) > Postcode.GetValue(items[j].Postcode) + }, - "oppervlakte": func(i, j int) bool { return items[i].Oppervlakte < items[j].Oppervlakte }, - "-oppervlakte": func(i, j int) bool { return items[i].Oppervlakte > items[j].Oppervlakte }, + "oppervlakte": func(i, j int) bool { + return Oppervlakte.GetValue(items[i].Oppervlakte) < Oppervlakte.GetValue(items[j].Oppervlakte) + }, + "-oppervlakte": func(i, j int) bool { + return Oppervlakte.GetValue(items[i].Oppervlakte) > Oppervlakte.GetValue(items[j].Oppervlakte) + }, - "woningequivalent": func(i, j int) bool { return items[i].Woningequivalent < items[j].Woningequivalent }, - "-woningequivalent": func(i, j int) bool { return items[i].Woningequivalent > items[j].Woningequivalent }, + "woningequivalent": func(i, j int) bool { + return Woningequivalent.GetValue(items[i].Woningequivalent) < Woningequivalent.GetValue(items[j].Woningequivalent) + }, + "-woningequivalent": func(i, j int) bool { + return Woningequivalent.GetValue(items[i].Woningequivalent) > Woningequivalent.GetValue(items[j].Woningequivalent) + }, "adres": func(i, j int) bool { return items[i].Adres < items[j].Adres }, "-adres": func(i, j int) bool { return items[i].Adres > items[j].Adres }, @@ -1277,8 +1289,137 @@ func createSort(items Items) sortLookup { return WoningType.GetValue(items[i].WoningType) > WoningType.GetValue(items[j].WoningType) }, + "labelscore_voorlopig": func(i, j int) bool { + return LabelscoreVoorlopig.GetValue(items[i].LabelscoreVoorlopig) < LabelscoreVoorlopig.GetValue(items[j].LabelscoreVoorlopig) + }, + "-labelscore_voorlopig": func(i, j int) bool { + return LabelscoreVoorlopig.GetValue(items[i].LabelscoreVoorlopig) > LabelscoreVoorlopig.GetValue(items[j].LabelscoreVoorlopig) + }, + + "labelscore_definitief": func(i, j int) bool { + return LabelscoreDefinitief.GetValue(items[i].LabelscoreDefinitief) < LabelscoreDefinitief.GetValue(items[j].LabelscoreDefinitief) + }, + "-labelscore_definitief": func(i, j int) bool { + return LabelscoreDefinitief.GetValue(items[i].LabelscoreDefinitief) > LabelscoreDefinitief.GetValue(items[j].LabelscoreDefinitief) + }, + + "gemeentecode": func(i, j int) bool { + return Gemeentecode.GetValue(items[i].Gemeentecode) < Gemeentecode.GetValue(items[j].Gemeentecode) + }, + "-gemeentecode": func(i, j int) bool { + return Gemeentecode.GetValue(items[i].Gemeentecode) > Gemeentecode.GetValue(items[j].Gemeentecode) + }, + + "gemeentenaam": func(i, j int) bool { + return Gemeentenaam.GetValue(items[i].Gemeentenaam) < Gemeentenaam.GetValue(items[j].Gemeentenaam) + }, + "-gemeentenaam": func(i, j int) bool { + return Gemeentenaam.GetValue(items[i].Gemeentenaam) > Gemeentenaam.GetValue(items[j].Gemeentenaam) + }, + + "buurtcode": func(i, j int) bool { + return Buurtcode.GetValue(items[i].Buurtcode) < Buurtcode.GetValue(items[j].Buurtcode) + }, + "-buurtcode": func(i, j int) bool { + return Buurtcode.GetValue(items[i].Buurtcode) > Buurtcode.GetValue(items[j].Buurtcode) + }, + + "buurtnaam": func(i, j int) bool { + return Buurtnaam.GetValue(items[i].Buurtnaam) < Buurtnaam.GetValue(items[j].Buurtnaam) + }, + "-buurtnaam": func(i, j int) bool { + return Buurtnaam.GetValue(items[i].Buurtnaam) > Buurtnaam.GetValue(items[j].Buurtnaam) + }, + + "wijkcode": func(i, j int) bool { + return Wijkcode.GetValue(items[i].Wijkcode) < Wijkcode.GetValue(items[j].Wijkcode) + }, + "-wijkcode": func(i, j int) bool { + return Wijkcode.GetValue(items[i].Wijkcode) > Wijkcode.GetValue(items[j].Wijkcode) + }, + + "wijknaam": func(i, j int) bool { + return Wijknaam.GetValue(items[i].Wijknaam) < Wijknaam.GetValue(items[j].Wijknaam) + }, + "-wijknaam": func(i, j int) bool { + return Wijknaam.GetValue(items[i].Wijknaam) > Wijknaam.GetValue(items[j].Wijknaam) + }, + + "provinciecode": func(i, j int) bool { + return Provinciecode.GetValue(items[i].Provinciecode) < Provinciecode.GetValue(items[j].Provinciecode) + }, + "-provinciecode": func(i, j int) bool { + return Provinciecode.GetValue(items[i].Provinciecode) > Provinciecode.GetValue(items[j].Provinciecode) + }, + + "provincienaam": func(i, j int) bool { + return Provincienaam.GetValue(items[i].Provincienaam) < Provincienaam.GetValue(items[j].Provincienaam) + }, + "-provincienaam": func(i, j int) bool { + return Provincienaam.GetValue(items[i].Provincienaam) > Provincienaam.GetValue(items[j].Provincienaam) + }, + "point": func(i, j int) bool { return items[i].Point < items[j].Point }, "-point": func(i, j int) bool { return items[i].Point > items[j].Point }, + + "pand_gas_ean_aansluitingen": func(i, j int) bool { + return PandGasEanAansluitingen.GetValue(items[i].PandGasEanAansluitingen) < PandGasEanAansluitingen.GetValue(items[j].PandGasEanAansluitingen) + }, + "-pand_gas_ean_aansluitingen": func(i, j int) bool { + return PandGasEanAansluitingen.GetValue(items[i].PandGasEanAansluitingen) > PandGasEanAansluitingen.GetValue(items[j].PandGasEanAansluitingen) + }, + + "group_id_2020": func(i, j int) bool { return items[i].GroupId2020 < items[j].GroupId2020 }, + "-group_id_2020": func(i, j int) bool { return items[i].GroupId2020 > items[j].GroupId2020 }, + + "p6_gas_aansluitingen_2020": func(i, j int) bool { + return P6GasAansluitingen2020.GetValue(items[i].P6GasAansluitingen2020) < P6GasAansluitingen2020.GetValue(items[j].P6GasAansluitingen2020) + }, + "-p6_gas_aansluitingen_2020": func(i, j int) bool { + return P6GasAansluitingen2020.GetValue(items[i].P6GasAansluitingen2020) > P6GasAansluitingen2020.GetValue(items[j].P6GasAansluitingen2020) + }, + + "p6_gasm3_2020": func(i, j int) bool { + return P6Gasm32020.GetValue(items[i].P6Gasm32020) < P6Gasm32020.GetValue(items[j].P6Gasm32020) + }, + "-p6_gasm3_2020": func(i, j int) bool { + return P6Gasm32020.GetValue(items[i].P6Gasm32020) > P6Gasm32020.GetValue(items[j].P6Gasm32020) + }, + + "p6_kwh_2020": func(i, j int) bool { + return P6Kwh2020.GetValue(items[i].P6Kwh2020) < P6Kwh2020.GetValue(items[j].P6Kwh2020) + }, + "-p6_kwh_2020": func(i, j int) bool { + return P6Kwh2020.GetValue(items[i].P6Kwh2020) > P6Kwh2020.GetValue(items[j].P6Kwh2020) + }, + + "p6_totaal_pandoppervlak_m2": func(i, j int) bool { + return P6TotaalPandoppervlakM2.GetValue(items[i].P6TotaalPandoppervlakM2) < P6TotaalPandoppervlakM2.GetValue(items[j].P6TotaalPandoppervlakM2) + }, + "-p6_totaal_pandoppervlak_m2": func(i, j int) bool { + return P6TotaalPandoppervlakM2.GetValue(items[i].P6TotaalPandoppervlakM2) > P6TotaalPandoppervlakM2.GetValue(items[j].P6TotaalPandoppervlakM2) + }, + + "pand_bouwjaar": func(i, j int) bool { + return PandBouwjaar.GetValue(items[i].PandBouwjaar) < PandBouwjaar.GetValue(items[j].PandBouwjaar) + }, + "-pand_bouwjaar": func(i, j int) bool { + return PandBouwjaar.GetValue(items[i].PandBouwjaar) > PandBouwjaar.GetValue(items[j].PandBouwjaar) + }, + + "pand_gas_aansluitingen": func(i, j int) bool { + return PandGasAansluitingen.GetValue(items[i].PandGasAansluitingen) < PandGasAansluitingen.GetValue(items[j].PandGasAansluitingen) + }, + "-pand_gas_aansluitingen": func(i, j int) bool { + return PandGasAansluitingen.GetValue(items[i].PandGasAansluitingen) > PandGasAansluitingen.GetValue(items[j].PandGasAansluitingen) + }, + + "gebruiksdoelen": func(i, j int) bool { + return Gebruiksdoelen.GetArrayValue(items[i].Gebruiksdoelen) < Gebruiksdoelen.GetArrayValue(items[j].Gebruiksdoelen) + }, + "-gebruiksdoelen": func(i, j int) bool { + return Gebruiksdoelen.GetArrayValue(items[i].Gebruiksdoelen) > Gebruiksdoelen.GetArrayValue(items[j].Gebruiksdoelen) + }, } return sortFuncs } diff --git a/model_maps.go b/model_maps.go index 3dba615..9c86229 100644 --- a/model_maps.go +++ b/model_maps.go @@ -4,11 +4,15 @@ maps are needed to restore integers back to the actual values. those are generated and stored here. */ -package main -import () +package main type ModelMaps struct { + Pid MappedColumn + Vid MappedColumn + Postcode MappedColumn + Oppervlakte MappedColumn + Woningequivalent MappedColumn WoningType MappedColumn LabelscoreVoorlopig MappedColumn LabelscoreDefinitief MappedColumn @@ -24,33 +28,35 @@ type ModelMaps struct { P6GasAansluitingen2020 MappedColumn P6Gasm32020 MappedColumn P6Kwh2020 MappedColumn + P6TotaalPandoppervlakM2 MappedColumn PandBouwjaar MappedColumn PandGasAansluitingen MappedColumn Gebruiksdoelen MappedColumn } -var modelmaps2 map[string]MappedColumn - -// Column maps. -// Store for each non distinct/repeated column - var BitArrays map[string]fieldBitarrayMap +var Pid MappedColumn +var Vid MappedColumn +var Postcode MappedColumn +var Oppervlakte MappedColumn +var Woningequivalent MappedColumn var WoningType MappedColumn var LabelscoreVoorlopig MappedColumn -var Gemeentecode MappedColumn var LabelscoreDefinitief MappedColumn +var Gemeentecode MappedColumn var Gemeentenaam MappedColumn var Buurtcode MappedColumn var Buurtnaam MappedColumn -var Provinciecode MappedColumn var Wijkcode MappedColumn var Wijknaam MappedColumn +var Provinciecode MappedColumn var Provincienaam MappedColumn var PandGasEanAansluitingen MappedColumn var P6GasAansluitingen2020 MappedColumn var P6Gasm32020 MappedColumn var P6Kwh2020 MappedColumn +var P6TotaalPandoppervlakM2 MappedColumn var PandBouwjaar MappedColumn var PandGasAansluitingen MappedColumn var Gebruiksdoelen MappedColumn @@ -61,9 +67,15 @@ func clearBitArrays() { func init() { clearBitArrays() + setUpRepeatedColumns() } func setUpRepeatedColumns() { + Pid = NewReapeatedColumn("pid") + Vid = NewReapeatedColumn("vid") + Postcode = NewReapeatedColumn("postcode") + Oppervlakte = NewReapeatedColumn("oppervlakte") + Woningequivalent = NewReapeatedColumn("woningequivalent") WoningType = NewReapeatedColumn("woning_type") LabelscoreVoorlopig = NewReapeatedColumn("labelscore_voorlopig") LabelscoreDefinitief = NewReapeatedColumn("labelscore_definitief") @@ -79,13 +91,20 @@ func setUpRepeatedColumns() { P6GasAansluitingen2020 = NewReapeatedColumn("p6_gas_aansluitingen_2020") P6Gasm32020 = NewReapeatedColumn("p6_gasm3_2020") P6Kwh2020 = NewReapeatedColumn("p6_kwh_2020") + P6TotaalPandoppervlakM2 = NewReapeatedColumn("p6_totaal_pandoppervlak_m2") PandBouwjaar = NewReapeatedColumn("pand_bouwjaar") PandGasAansluitingen = NewReapeatedColumn("pand_gas_aansluitingen") Gebruiksdoelen = NewReapeatedColumn("gebruiksdoelen") + } func CreateMapstore() ModelMaps { return ModelMaps{ + Pid, + Vid, + Postcode, + Oppervlakte, + Woningequivalent, WoningType, LabelscoreVoorlopig, LabelscoreDefinitief, @@ -101,6 +120,7 @@ func CreateMapstore() ModelMaps { P6GasAansluitingen2020, P6Gasm32020, P6Kwh2020, + P6TotaalPandoppervlakM2, PandBouwjaar, PandGasAansluitingen, Gebruiksdoelen, @@ -108,6 +128,12 @@ func CreateMapstore() ModelMaps { } func LoadMapstore(m ModelMaps) { + + Pid = m.Pid + Vid = m.Vid + Postcode = m.Postcode + Oppervlakte = m.Oppervlakte + Woningequivalent = m.Woningequivalent WoningType = m.WoningType LabelscoreVoorlopig = m.LabelscoreVoorlopig LabelscoreDefinitief = m.LabelscoreDefinitief @@ -123,7 +149,9 @@ func LoadMapstore(m ModelMaps) { P6GasAansluitingen2020 = m.P6GasAansluitingen2020 P6Gasm32020 = m.P6Gasm32020 P6Kwh2020 = m.P6Kwh2020 + P6TotaalPandoppervlakM2 = m.P6TotaalPandoppervlakM2 PandBouwjaar = m.PandBouwjaar PandGasAansluitingen = m.PandGasAansluitingen Gebruiksdoelen = m.Gebruiksdoelen + } From 5733650f5ecd2da15c6a44f68bb52f1e7d13d3e3 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 27 Apr 2021 14:12:59 +0200 Subject: [PATCH 50/54] fix: colom test --- custom.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/custom.go b/custom.go index aa432db..3c22578 100644 --- a/custom.go +++ b/custom.go @@ -19,7 +19,7 @@ func reduceWEQ(items Items) map[string]string { result := make(map[string]string) weq := 0 for i := range items { - _weq, err := strconv.ParseInt(items[i].Woningequivalent, 10, 64) + _weq, err := strconv.ParseInt(Woningequivalent.GetValue(items[i].Woningequivalent), 10, 64) if err != nil { panic(err) } From b51468a139bd0d75e5088621beb0da8f31038ab4 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 27 Apr 2021 15:06:33 +0200 Subject: [PATCH 51/54] docs: column.go --- column.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/column.go b/column.go index 87a0add..a523bd8 100644 --- a/column.go +++ b/column.go @@ -10,9 +10,13 @@ import ( type fieldIdxMap map[string]uint32 type IdxFieldMap map[uint32]string +/* +MappedColumn store fields in Idx +*/ + type MappedColumn struct { - Idx fieldIdxMap - Field IdxFieldMap + Idx fieldIdxMap // stores field to int values + Field IdxFieldMap // stores int to field values to recover actual fields IdxTracker uint32 } From f8cb26eca1e95d76928cfab2900e3ce4468a7487 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 27 Apr 2021 15:11:08 +0200 Subject: [PATCH 52/54] docs: model.go creation --- README.md | 4 ++-- docker-compose.yml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index aac8537..d4e030c 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ python3 extras/ingestion.py -f movies_subset.tsv -format tsv -dbhost 127.0.0.1: - load data directly from a database (periodic) - document the `create_model.py` questions -- use a remote source for CSV -- use some compression faster to load than gzip +- use a remote data source +- use some more efficient storage method (done) - generate swagger API - Add more tests diff --git a/docker-compose.yml b/docker-compose.yml index 991360a..2d3833b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,7 @@ services: build: . ports: - 8000:8000 + prometheus: image: prom/prometheus:v2.21.0 ports: From 29daf04b22a329c0642c80876727b942e173bac2 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Tue, 4 May 2021 11:48:25 +0200 Subject: [PATCH 53/54] production version, improved error reporting about bitarray usage --- column.go | 26 +++++++++++++++++--------- geo.go | 4 ++-- http_handlers.go | 5 +++++ model.go | 7 ++++--- model_maps.go | 25 +++++++++++++++++++++++++ operations.go | 5 +++-- storage_operations.go | 2 -- storage_operations_test.go | 18 ++++++++++++++++++ store.go | 8 +++++--- 9 files changed, 79 insertions(+), 21 deletions(-) diff --git a/column.go b/column.go index a523bd8..664e50d 100644 --- a/column.go +++ b/column.go @@ -2,6 +2,7 @@ package main import ( "errors" + "fmt" "github.com/Workiva/go-datastructures/bitarray" "log" "strings" @@ -18,14 +19,15 @@ type MappedColumn struct { Idx fieldIdxMap // stores field to int values Field IdxFieldMap // stores int to field values to recover actual fields IdxTracker uint32 + Name string } type ColumnRegister map[string]MappedColumn -var RepeatedColumns ColumnRegister +var RegisteredColumns ColumnRegister func init() { - RepeatedColumns = make(ColumnRegister) + RegisteredColumns = make(ColumnRegister) } func NewReapeatedColumn(column string) MappedColumn { @@ -33,8 +35,9 @@ func NewReapeatedColumn(column string) MappedColumn { make(fieldIdxMap), make(IdxFieldMap), 0, + column, } - RepeatedColumns[column] = m + RegisteredColumns[column] = m return m } @@ -91,7 +94,7 @@ func (m *MappedColumn) GetIndex(s string) uint32 { return m.Idx[s] } -// SetBitArray WIP +// SetBitArray func SetBitArray(column string, i uint32, label int) { var ba bitarray.BitArray @@ -99,6 +102,7 @@ func SetBitArray(column string, i uint32, label int) { // check if map of bitmaps is present for column var map_ba fieldBitarrayMap + if _, ok = BitArrays[column]; !ok { map_ba := make(fieldBitarrayMap) BitArrays[column] = map_ba @@ -108,6 +112,7 @@ func SetBitArray(column string, i uint32, label int) { // check for existing bitarray for i value ba, ok = map_ba[i] + if !ok { ba = bitarray.NewSparseBitArray() map_ba[i] = ba @@ -121,19 +126,22 @@ func GetBitArray(column, value string) (bitarray.BitArray, error) { var ok bool if _, ok = BitArrays[column]; !ok { - return nil, errors.New("no bitarray filter found for column " + column) + return nil, errors.New("no bitarray filter found for " + column) } - bpi, ok := RepeatedColumns[column].Idx[value] + // convert string value to actual indexed int. + i, ok := RegisteredColumns[column].Idx[value] if !ok { - return nil, errors.New("no bitarray filter found for column value WoningType") + msg := fmt.Sprintf("no indexed int value found for %s %s", column, value) + return nil, errors.New(msg) } - ba, ok := BitArrays[column][bpi] + ba, ok := BitArrays[column][i] if !ok { - return nil, errors.New("no bitarray filter found for column idx value WoningType") + msg := fmt.Sprintf("no bitarray found for %s %s %d", column, value, i) + return nil, errors.New(msg) } return ba, nil diff --git a/geo.go b/geo.go index c454488..53e5a6c 100644 --- a/geo.go +++ b/geo.go @@ -115,9 +115,9 @@ func (i Item) GeoIndex(label int) error { center := s2.PointFromLatLng(ll) cell := s2.CellFromPoint(center) - cnode := cellIndexNode{ID: cell.ID(), Label: label} + cnode := cellIndexNode{ID: cell.ID(), Label: i.Label} S2CELLS = append(S2CELLS, cnode) - S2CELLMAP[label] = cell.ID() + S2CELLMAP[i.Label] = cell.ID() // Update index while loading data so queries already work if label%100000 == 0 { diff --git a/http_handlers.go b/http_handlers.go index 32b2b80..affc299 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -275,9 +275,14 @@ func writeCSV(items Items, w http.ResponseWriter) { func loadRest(w http.ResponseWriter, r *http.Request) { storagename, _, retrievefunc, filename := handleInputStorage(r) + start := time.Now() msg := fmt.Sprintf("retrieving with: %s, with filename: %s", storagename, filename) fmt.Printf(WarningColorN, msg) itemsAdded, err := retrievefunc(filename) + diff := time.Since(start) + msg = fmt.Sprint("loading time: ", diff) + fmt.Printf(WarningColorN, msg) + if err != nil { log.Printf("could not open %s reason %s", filename, err) w.Write([]byte("500 - could not load data")) diff --git a/model.go b/model.go index 68dc2c7..3da7122 100644 --- a/model.go +++ b/model.go @@ -206,11 +206,12 @@ func (i ItemIn) Shrink(label int) Item { // Store selected columns in seperate map[columnvalue]bitarray // for fast item selection -func (i Item) StoreBitArrayColumns() { +func (i *Item) StoreBitArrayColumns() { + SetBitArray("pid", i.Pid, i.Label) SetBitArray("woning_type", i.WoningType, i.Label) - SetBitArray("labelscore_voorlopig", i.LabelscoreVoorlopig, i.Label) - SetBitArray("labelscore_definitief", i.LabelscoreDefinitief, i.Label) + // SetBitArray("labelscore_voorlopig", i.LabelscoreVoorlopig, i.Label) + // SetBitArray("labelscore_definitief", i.LabelscoreDefinitief, i.Label) SetBitArray("gemeentecode", i.Gemeentecode, i.Label) SetBitArray("buurtcode", i.Buurtcode, i.Label) SetBitArray("wijkcode", i.Wijkcode, i.Label) diff --git a/model_maps.go b/model_maps.go index 9c86229..2762a35 100644 --- a/model_maps.go +++ b/model_maps.go @@ -154,4 +154,29 @@ func LoadMapstore(m ModelMaps) { PandGasAansluitingen = m.PandGasAansluitingen Gebruiksdoelen = m.Gebruiksdoelen + // register the columns + RegisteredColumns[Pid.Name] = Pid + RegisteredColumns[Vid.Name] = Vid + RegisteredColumns[Postcode.Name] = Postcode + RegisteredColumns[Oppervlakte.Name] = Oppervlakte + RegisteredColumns[Woningequivalent.Name] = Woningequivalent + RegisteredColumns[WoningType.Name] = WoningType + RegisteredColumns[LabelscoreVoorlopig.Name] = LabelscoreVoorlopig + RegisteredColumns[LabelscoreDefinitief.Name] = LabelscoreDefinitief + RegisteredColumns[Gemeentecode.Name] = Gemeentecode + RegisteredColumns[Gemeentenaam.Name] = Gemeentenaam + RegisteredColumns[Buurtcode.Name] = Buurtcode + RegisteredColumns[Buurtnaam.Name] = Buurtnaam + RegisteredColumns[Wijkcode.Name] = Wijkcode + RegisteredColumns[Wijknaam.Name] = Wijknaam + RegisteredColumns[Provinciecode.Name] = Provinciecode + RegisteredColumns[Provincienaam.Name] = Provincienaam + RegisteredColumns[PandGasEanAansluitingen.Name] = PandGasEanAansluitingen + RegisteredColumns[P6GasAansluitingen2020.Name] = P6GasAansluitingen2020 + RegisteredColumns[P6Gasm32020.Name] = P6Gasm32020 + RegisteredColumns[P6Kwh2020.Name] = P6Kwh2020 + RegisteredColumns[P6TotaalPandoppervlakM2.Name] = P6TotaalPandoppervlakM2 + RegisteredColumns[PandBouwjaar.Name] = PandBouwjaar + RegisteredColumns[PandGasAansluitingen.Name] = PandGasAansluitingen + RegisteredColumns[Gebruiksdoelen.Name] = Gebruiksdoelen } diff --git a/operations.go b/operations.go index b48ccbc..ba74d4d 100644 --- a/operations.go +++ b/operations.go @@ -425,9 +425,9 @@ func filteredEarlyExitSingle(items *Items, column string, operations GroupedOper return results } -// bit Array Filter. +// BitArray Filter. // for columns with not so unique values it makes sense te create bitarrays. -// to do fast bitwise operations. +// to enable fast bitwise selection operations. func bitArrayFilter( items *Items, query Query) (Items, error) { @@ -440,6 +440,7 @@ func bitArrayFilter( if len(parameter) == 0 { continue } + if !foundkey { continue } diff --git a/storage_operations.go b/storage_operations.go index a7accf3..e142f56 100644 --- a/storage_operations.go +++ b/storage_operations.go @@ -225,8 +225,6 @@ func loadAtStart(storagename string, filename string, indexed bool) { msg = fmt.Sprint("Loaded in memory amount: ", itemsAdded, " time: ", diff) fmt.Printf(WarningColorN, msg) - ITEMS.FillIndexes() - /* should be added to FillIndexes if indexed { start = time.Now() diff --git a/storage_operations_test.go b/storage_operations_test.go index da3f98f..567f186 100644 --- a/storage_operations_test.go +++ b/storage_operations_test.go @@ -17,7 +17,13 @@ func TestBytesSaving(t *testing.T) { func TestBytes(t *testing.T) { saveAsBytes("testdata/testbytes") + + RegisteredColumns = make(ColumnRegister) ITEMS = Items{} + + clearBitArrays() + clearGeoIndex() + loadAsBytes("testdata/testbytes") if len(ITEMS) != 10 { @@ -31,6 +37,18 @@ func TestBytes(t *testing.T) { t.Error("bytes compressed save / load failed") } + if len(BitArrays) == 0 { + t.Error("bitarrays are not restored") + } + + if len(S2CELLS) == 0 { + t.Error("geoindex is not restored") + } + + if len(RegisteredColumns) == 0 { + t.Error("colom register is not restored") + } + } func TestJson(t *testing.T) { diff --git a/store.go b/store.go index 8797f7b..1d8ef78 100644 --- a/store.go +++ b/store.go @@ -3,6 +3,7 @@ package main import ( "fmt" "log" + "runtime" "sync" "time" ) @@ -35,7 +36,6 @@ func ItemChanWorker(itemChan ItemsChannel) { smallItem := itm.Shrink(label) smallItem.StoreBitArrayColumns() ITEMS = append(ITEMS, &smallItem) - // ITEMS[label] = &smallItem if ITEMS[label] != &smallItem { log.Fatal("storing item index off") } @@ -47,7 +47,7 @@ func ItemChanWorker(itemChan ItemsChannel) { } } -func (items Items) FillIndexes() { +func (items *Items) FillIndexes() { start := time.Now() @@ -57,7 +57,7 @@ func (items Items) FillIndexes() { clearGeoIndex() clearBitArrays() - for i := range items { + for i := range *items { ITEMS[i].StoreBitArrayColumns() ITEMS[i].GeoIndex(ITEMS[i].Label) } @@ -65,4 +65,6 @@ func (items Items) FillIndexes() { diff := time.Since(start) msg := fmt.Sprint("Index set time: ", diff) fmt.Printf(WarningColorN, msg) + // run garbadge collection + runtime.GC() } From bf9d65eea060c3fda497d7ae2a0d2423e4942103 Mon Sep 17 00:00:00 2001 From: Stepahn Preeker Date: Mon, 10 May 2021 16:37:54 +0200 Subject: [PATCH 54/54] added huisnummer / toevoegingen many small fixes to code generation --- custom.go | 28 ++ extras/config.yaml | 7 +- extras/create_model.py | 6 + extras/templates/model.template.jinja2 | 2 +- extras/templates/modelmap.template.jinja2 | 2 + geo.go | 6 +- http_handlers.go | 15 +- http_handlers_test.go | 4 +- main.go | 8 +- model.go | 253 +++++++++++++++--- model_maps.go | 30 +++ store.go | 2 + testdata/README.md | 3 + ...aselectie_vbo_energie_20210217.head.csv.gz | Bin 717 -> 0 bytes ...dataselectie_vbo_energie_20210505.head.csv | 11 + 15 files changed, 329 insertions(+), 48 deletions(-) create mode 100644 testdata/README.md delete mode 100644 testdata/dataselectie_vbo_energie_20210217.head.csv.gz create mode 100644 testdata/dataselectie_vbo_energie_20210505.head.csv diff --git a/custom.go b/custom.go index 3c22578..ad14371 100644 --- a/custom.go +++ b/custom.go @@ -1,7 +1,9 @@ package main import ( + "fmt" "strconv" + "strings" ) type registerCustomGroupByFunc map[string]func(*Item, ItemsGroupedBy) @@ -28,3 +30,29 @@ func reduceWEQ(items Items) map[string]string { result["woningenquivalent"] = strconv.Itoa(weq) return result } + +func GettersToevoegingen(i *Item) string { + return Postcode.GetValue(i.Postcode) + " " + Huisnummer.GetValue(i.Huisnummer) +} + +// getter Gebruiksdoelen +func GroupByGettersGebruiksdoelen(item *Item, grouping ItemsGroupedBy) { + + for i := range item.Gebruiksdoelen { + groupkey := Gebruiksdoelen.GetValue(item.Gebruiksdoelen[i]) + grouping[groupkey] = append(grouping[groupkey], item) + } +} + +func GetAdres(i *Item) string { + adres := fmt.Sprintf("%s %s %s %s %s %s", + Straat.GetValue(i.Straat), + Huisnummer.GetValue(i.Huisnummer), + Huisletter.GetValue(i.Huisletter), + Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging), + Postcode.GetValue(i.Postcode), + Gemeentenaam.GetValue(i.Gemeentenaam)) + + adres = strings.ReplaceAll(adres, " ", " ") + return adres +} diff --git a/extras/config.yaml b/extras/config.yaml index 5acd74b..6c4b34a 100644 --- a/extras/config.yaml +++ b/extras/config.yaml @@ -2,10 +2,14 @@ model: adres: u buurtcode: b buurtnaam: r + energieklasse: b gebruiksdoelen: r gemeentecode: b gemeentenaam: r group_id_2020: u + huisletter: r + huisnummer: r + huisnummertoevoeging: r index: '0' labelscore_definitief: b labelscore_voorlopig: b @@ -20,9 +24,10 @@ model: pand_gas_ean_aansluitingen: r pid: b point: g - postcode: r + postcode: b provinciecode: b provincienaam: r + straat: r vid: r wijkcode: b wijknaam: r diff --git a/extras/create_model.py b/extras/create_model.py index 891a915..6326805 100644 --- a/extras/create_model.py +++ b/extras/create_model.py @@ -179,6 +179,8 @@ def gocamelCase(string): repeatColumnNames = [] loadRepeatColumnNames = [] mappedColumns = [] +registerColumns = [] + for columnName, c2 in zip(repeated, repeated_org): initRow = f'\t {columnName} = NewReapeatedColumn("{c2}")\n' @@ -190,6 +192,9 @@ def gocamelCase(string): loadRow = f"\t {columnName} = m.{columnName} \n" loadRepeatColumnNames.append(loadRow) + registerColumnsRow = f"\t RegisteredColumns[{columnName}.Name] = {columnName} \n" + registerColumns.append(registerColumnsRow) + mappedColumnsRow = f"\t {columnName} MappedColumn \n" mappedColumns.append(mappedColumnsRow) @@ -362,6 +367,7 @@ def gocamelCase(string): initRepeatColumns=''.join(initRepeatColumns), repeatColumnNames=''.join(repeatColumnNames), loadRepeatColumnNames=''.join(loadRepeatColumnNames), + registerColumns=''.join(registerColumns), mappedColumns=''.join(mappedColumns), shrinkVars=''.join(shrinkVars), ) diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 index 7beb896..b34adb3 100644 --- a/extras/templates/model.template.jinja2 +++ b/extras/templates/model.template.jinja2 @@ -85,7 +85,7 @@ func (i ItemIn) Shrink(label int) Item { // Store selected columns in seperate map[columnvalue]bitarray // for fast item selection -func (i Item) StoreBitArrayColumns() { +func (i *Item) StoreBitArrayColumns() { {{ bitArrayStores }} } diff --git a/extras/templates/modelmap.template.jinja2 b/extras/templates/modelmap.template.jinja2 index c5bc4f5..17b8b29 100644 --- a/extras/templates/modelmap.template.jinja2 +++ b/extras/templates/modelmap.template.jinja2 @@ -44,4 +44,6 @@ func LoadMapstore(m ModelMaps) { {{loadRepeatColumnNames}} +{{registerColumns}} + } diff --git a/geo.go b/geo.go index 53e5a6c..74c9750 100644 --- a/geo.go +++ b/geo.go @@ -120,9 +120,9 @@ func (i Item) GeoIndex(label int) error { S2CELLMAP[i.Label] = cell.ID() // Update index while loading data so queries already work - if label%100000 == 0 { - S2CELLS.Sort() - } + //if label%1000000 == 0 { + // S2CELLS.Sort() + //} return nil diff --git a/http_handlers.go b/http_handlers.go index affc299..3fbe35a 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -413,6 +413,10 @@ func corsEnabled(h http.Handler) http.Handler { w.Header().Set("Access-Control-Allow-Headers", "Page, Page-Size, Total-Pages, query, Total-Items, Query-Duration, Content-Type, X-CSRF-Token, Authorization") return } else { + // make sure items are not being modified during request + // otherwise wait.. + lock.RLock() + defer lock.RUnlock() h.ServeHTTP(w, r) } }) @@ -420,21 +424,20 @@ func corsEnabled(h http.Handler) http.Handler { } func passThrough(h http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // make sure items are not being modified during request + // otherwise wait.. + lock.RLock() + defer lock.RUnlock() h.ServeHTTP(w, r) - }) } func MIDDLEWARE(cors bool) func(http.Handler) http.Handler { + if cors { return corsEnabled } - // make sure items are not being modified during request - // otherwise wait.. - lock.RLock() - defer lock.RUnlock() - return passThrough } diff --git a/http_handlers_test.go b/http_handlers_test.go index f4800c5..8a2ed67 100644 --- a/http_handlers_test.go +++ b/http_handlers_test.go @@ -35,7 +35,7 @@ func TestMain(m *testing.M) { defaultSettings() SETTINGS.Set( - "csv", "./testdata/dataselectie_vbo_energie_20210217.head.csv.gz", + "csv", "./testdata/dataselectie_vbo_energie_20210505.head.csv", "test dataset") SETTINGS.Set("channelwait", "0.01s", "timeout for channel loading") @@ -72,7 +72,7 @@ func TestBasicHandlers(t *testing.T) { tests := []testCase{ testCase{"/list/?search=1", "10"}, - testCase{"/typeahead/gemeentecode/?search=1", "2"}, + testCase{"/typeahead/huisnummer/?search=1", "3"}, testCase{"/typeahead/pid/?search=1", "2"}, testCase{"/help/", ""}, } diff --git a/main.go b/main.go index 041a6e3..9b4dba3 100644 --- a/main.go +++ b/main.go @@ -66,7 +66,7 @@ func defaultSettings() { SETTINGS.Set("SHAREDSECRET", "", "jwt shared secret") SETTINGS.Set("JWTENABLED", "y", "JWT enabled") - SETTINGS.Set("CORS", "n", "CORS enabled") + SETTINGS.Set("CORS", "y", "CORS enabled") SETTINGS.Set("csv", "", "load a gzipped csv file on starup") SETTINGS.Set("null-delimiter", "\\N", "null delimiter") @@ -150,14 +150,14 @@ func setupHandler() http.Handler { mux.HandleFunc("/list/", listRest) mux.HandleFunc("/help/", helpRest) + mux.Handle("/", http.FileServer(http.Dir("./files/www"))) + mux.Handle("/dsm-search", http.FileServer(http.Dir("./files/www"))) + if SETTINGS.Get("mgmt") == "y" { mux.HandleFunc("/mgmt/add/", addRest) mux.HandleFunc("/mgmt/rm/", rmRest) mux.HandleFunc("/mgmt/save/", saveRest) mux.HandleFunc("/mgmt/load/", loadRest) - - mux.Handle("/", http.FileServer(http.Dir("./files/www"))) - mux.Handle("/dsm-search", http.FileServer(http.Dir("./files/www"))) } if SETTINGS.Get("prometheus-monitoring") == "y" { diff --git a/model.go b/model.go index 3da7122..8c4adba 100644 --- a/model.go +++ b/model.go @@ -46,13 +46,17 @@ type ItemIn struct { Pid string `json:"pid"` Vid string `json:"vid"` Numid string `json:"numid"` + Straat string `json:"straat"` Postcode string `json:"postcode"` + Huisnummer string `json:"huisnummer"` + Huisletter string `json:"huisletter"` + Huisnummertoevoeging string `json:"huisnummertoevoeging"` Oppervlakte string `json:"oppervlakte"` Woningequivalent string `json:"woningequivalent"` - Adres string `json:"adres"` WoningType string `json:"woning_type"` LabelscoreVoorlopig string `json:"labelscore_voorlopig"` LabelscoreDefinitief string `json:"labelscore_definitief"` + Energieklasse string `json:"energieklasse"` Gemeentecode string `json:"gemeentecode"` Gemeentenaam string `json:"gemeentenaam"` Buurtcode string `json:"buurtcode"` @@ -77,13 +81,18 @@ type ItemOut struct { Pid string `json:"pid"` Vid string `json:"vid"` Numid string `json:"numid"` + Straat string `json:"straat"` Postcode string `json:"postcode"` + Huisnummer string `json:"huisnummer"` + Huisletter string `json:"huisletter"` + Huisnummertoevoeging string `json:"huisnummertoevoeging"` + Adres string `json:"adres"` // should be removed soon Oppervlakte string `json:"oppervlakte"` Woningequivalent string `json:"woningequivalent"` - Adres string `json:"adres"` WoningType string `json:"woning_type"` LabelscoreVoorlopig string `json:"labelscore_voorlopig"` LabelscoreDefinitief string `json:"labelscore_definitief"` + Energieklasse string `json:"energieklasse"` Gemeentecode string `json:"gemeentecode"` Gemeentenaam string `json:"gemeentenaam"` Buurtcode string `json:"buurtcode"` @@ -109,13 +118,17 @@ type Item struct { Pid uint32 Vid uint32 Numid string + Straat uint32 Postcode uint32 + Huisnummer uint32 + Huisletter uint32 + Huisnummertoevoeging uint32 Oppervlakte uint32 Woningequivalent uint32 - Adres string WoningType uint32 LabelscoreVoorlopig uint32 LabelscoreDefinitief uint32 + Energieklasse uint32 Gemeentecode uint32 Gemeentenaam uint32 Buurtcode uint32 @@ -145,12 +158,17 @@ func (i ItemIn) Shrink(label int) Item { Pid.Store(i.Pid) Vid.Store(i.Vid) + Straat.Store(i.Straat) Postcode.Store(i.Postcode) + Huisnummer.Store(i.Huisnummer) + Huisletter.Store(i.Huisletter) + Huisnummertoevoeging.Store(i.Huisnummertoevoeging) Oppervlakte.Store(i.Oppervlakte) Woningequivalent.Store(i.Woningequivalent) WoningType.Store(i.WoningType) LabelscoreVoorlopig.Store(i.LabelscoreVoorlopig) LabelscoreDefinitief.Store(i.LabelscoreDefinitief) + Energieklasse.Store(i.Energieklasse) Gemeentecode.Store(i.Gemeentecode) Gemeentenaam.Store(i.Gemeentenaam) Buurtcode.Store(i.Buurtcode) @@ -176,13 +194,17 @@ func (i ItemIn) Shrink(label int) Item { Pid.GetIndex(i.Pid), Vid.GetIndex(i.Vid), i.Numid, + Straat.GetIndex(i.Straat), Postcode.GetIndex(i.Postcode), + Huisnummer.GetIndex(i.Huisnummer), + Huisletter.GetIndex(i.Huisletter), + Huisnummertoevoeging.GetIndex(i.Huisnummertoevoeging), Oppervlakte.GetIndex(i.Oppervlakte), Woningequivalent.GetIndex(i.Woningequivalent), - i.Adres, WoningType.GetIndex(i.WoningType), LabelscoreVoorlopig.GetIndex(i.LabelscoreVoorlopig), LabelscoreDefinitief.GetIndex(i.LabelscoreDefinitief), + Energieklasse.GetIndex(i.Energieklasse), Gemeentecode.GetIndex(i.Gemeentecode), Gemeentenaam.GetIndex(i.Gemeentenaam), Buurtcode.GetIndex(i.Buurtcode), @@ -209,9 +231,11 @@ func (i ItemIn) Shrink(label int) Item { func (i *Item) StoreBitArrayColumns() { SetBitArray("pid", i.Pid, i.Label) + SetBitArray("postcode", i.Postcode, i.Label) SetBitArray("woning_type", i.WoningType, i.Label) - // SetBitArray("labelscore_voorlopig", i.LabelscoreVoorlopig, i.Label) - // SetBitArray("labelscore_definitief", i.LabelscoreDefinitief, i.Label) + SetBitArray("labelscore_voorlopig", i.LabelscoreVoorlopig, i.Label) + SetBitArray("labelscore_definitief", i.LabelscoreDefinitief, i.Label) + SetBitArray("energieklasse", i.Energieklasse, i.Label) SetBitArray("gemeentecode", i.Gemeentecode, i.Label) SetBitArray("buurtcode", i.Buurtcode, i.Label) SetBitArray("wijkcode", i.Wijkcode, i.Label) @@ -225,13 +249,18 @@ func (i Item) Serialize() ItemOut { Pid.GetValue(i.Pid), Vid.GetValue(i.Vid), i.Numid, + Straat.GetValue(i.Straat), Postcode.GetValue(i.Postcode), + Huisnummer.GetValue(i.Huisnummer), + Huisletter.GetValue(i.Huisletter), + Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging), + GetAdres(&i), Oppervlakte.GetValue(i.Oppervlakte), Woningequivalent.GetValue(i.Woningequivalent), - i.Adres, WoningType.GetValue(i.WoningType), LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief), + Energieklasse.GetValue(i.Energieklasse), Gemeentecode.GetValue(i.Gemeentecode), Gemeentenaam.GetValue(i.Gemeentenaam), Buurtcode.GetValue(i.Buurtcode), @@ -259,13 +288,17 @@ func (i ItemIn) Columns() []string { "pid", "vid", "numid", + "straat", "postcode", + "huisnummer", + "huisletter", + "huisnummertoevoeging", "oppervlakte", "woningequivalent", - "adres", "woning_type", "labelscore_voorlopig", "labelscore_definitief", + "energieklasse", "gemeentecode", "gemeentenaam", "buurtcode", @@ -293,13 +326,17 @@ func (i ItemOut) Columns() []string { "pid", "vid", "numid", + "straat", "postcode", + "huisnummer", + "huisletter", + "huisnummertoevoeging", "oppervlakte", "woningequivalent", - "adres", "woning_type", "labelscore_voorlopig", "labelscore_definitief", + "energieklasse", "gemeentecode", "gemeentenaam", "buurtcode", @@ -328,13 +365,17 @@ func (i Item) Row() []string { Pid.GetValue(i.Pid), Vid.GetValue(i.Vid), i.Numid, + Straat.GetValue(i.Straat), Postcode.GetValue(i.Postcode), + Huisnummer.GetValue(i.Huisnummer), + Huisletter.GetValue(i.Huisletter), + Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging), Oppervlakte.GetValue(i.Oppervlakte), Woningequivalent.GetValue(i.Woningequivalent), - i.Adres, WoningType.GetValue(i.WoningType), LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief), + Energieklasse.GetValue(i.Energieklasse), Gemeentecode.GetValue(i.Gemeentecode), Gemeentenaam.GetValue(i.Gemeentenaam), Buurtcode.GetValue(i.Buurtcode), @@ -424,6 +465,26 @@ func GettersNumid(i *Item) string { return i.Numid } +// contain filter Straat +func FilterStraatContains(i *Item, s string) bool { + return strings.Contains(Straat.GetValue(i.Straat), s) +} + +// startswith filter Straat +func FilterStraatStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Straat.GetValue(i.Straat), s) +} + +// match filters Straat +func FilterStraatMatch(i *Item, s string) bool { + return Straat.GetValue(i.Straat) == s +} + +// getter Straat +func GettersStraat(i *Item) string { + return Straat.GetValue(i.Straat) +} + // contain filter Postcode func FilterPostcodeContains(i *Item, s string) bool { return strings.Contains(Postcode.GetValue(i.Postcode), s) @@ -444,6 +505,66 @@ func GettersPostcode(i *Item) string { return Postcode.GetValue(i.Postcode) } +// contain filter Huisnummer +func FilterHuisnummerContains(i *Item, s string) bool { + return strings.Contains(Huisnummer.GetValue(i.Huisnummer), s) +} + +// startswith filter Huisnummer +func FilterHuisnummerStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Huisnummer.GetValue(i.Huisnummer), s) +} + +// match filters Huisnummer +func FilterHuisnummerMatch(i *Item, s string) bool { + return Huisnummer.GetValue(i.Huisnummer) == s +} + +// getter Huisnummer +func GettersHuisnummer(i *Item) string { + return Huisnummer.GetValue(i.Huisnummer) +} + +// contain filter Huisletter +func FilterHuisletterContains(i *Item, s string) bool { + return strings.Contains(Huisletter.GetValue(i.Huisletter), s) +} + +// startswith filter Huisletter +func FilterHuisletterStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Huisletter.GetValue(i.Huisletter), s) +} + +// match filters Huisletter +func FilterHuisletterMatch(i *Item, s string) bool { + return Huisletter.GetValue(i.Huisletter) == s +} + +// getter Huisletter +func GettersHuisletter(i *Item) string { + return Huisletter.GetValue(i.Huisletter) +} + +// contain filter Huisnummertoevoeging +func FilterHuisnummertoevoegingContains(i *Item, s string) bool { + return strings.Contains(Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging), s) +} + +// startswith filter Huisnummertoevoeging +func FilterHuisnummertoevoegingStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging), s) +} + +// match filters Huisnummertoevoeging +func FilterHuisnummertoevoegingMatch(i *Item, s string) bool { + return Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging) == s +} + +// getter Huisnummertoevoeging +func GettersHuisnummertoevoeging(i *Item) string { + return Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging) +} + // contain filter Oppervlakte func FilterOppervlakteContains(i *Item, s string) bool { return strings.Contains(Oppervlakte.GetValue(i.Oppervlakte), s) @@ -484,6 +605,7 @@ func GettersWoningequivalent(i *Item) string { return Woningequivalent.GetValue(i.Woningequivalent) } +/* // contain filter Adres func FilterAdresContains(i *Item, s string) bool { return strings.Contains(i.Adres, s) @@ -503,6 +625,7 @@ func FilterAdresMatch(i *Item, s string) bool { func GettersAdres(i *Item) string { return i.Adres } +*/ // contain filter WoningType func FilterWoningTypeContains(i *Item, s string) bool { @@ -564,6 +687,26 @@ func GettersLabelscoreDefinitief(i *Item) string { return LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief) } +// contain filter Energieklasse +func FilterEnergieklasseContains(i *Item, s string) bool { + return strings.Contains(Energieklasse.GetValue(i.Energieklasse), s) +} + +// startswith filter Energieklasse +func FilterEnergieklasseStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Energieklasse.GetValue(i.Energieklasse), s) +} + +// match filters Energieklasse +func FilterEnergieklasseMatch(i *Item, s string) bool { + return Energieklasse.GetValue(i.Energieklasse) == s +} + +// getter Energieklasse +func GettersEnergieklasse(i *Item) string { + return Energieklasse.GetValue(i.Energieklasse) +} + // contain filter Gemeentecode func FilterGemeentecodeContains(i *Item, s string) bool { return strings.Contains(Gemeentecode.GetValue(i.Gemeentecode), s) @@ -943,15 +1086,6 @@ func GettersGebruiksdoelen(i *Item) string { return Gebruiksdoelen.GetArrayValue(i.Gebruiksdoelen) } -// getter Gebruiksdoelen -func GroupByGettersGebruiksdoelen(item *Item, grouping ItemsGroupedBy) { - - for i := range item.Gebruiksdoelen { - groupkey := Gebruiksdoelen.GetValue(item.Gebruiksdoelen[i]) - grouping[groupkey] = append(grouping[groupkey], item) - } -} - /* // contain filters func FilterEkeyContains(i *Item, s string) bool { @@ -1025,7 +1159,7 @@ func init() { // example RegisterFuncMap["search"] = FilterEkeyStartsWith //RegisterFuncMap["value"] = 'EDITYOURSELF' - RegisterGetters["value"] = GettersAdres + RegisterGetters["value"] = GettersGemeentecode // register filters @@ -1050,6 +1184,13 @@ func init() { RegisterGetters["numid"] = GettersNumid RegisterGroupBy["numid"] = GettersNumid + //register filters for Straat + RegisterFuncMap["match-straat"] = FilterStraatMatch + RegisterFuncMap["contains-straat"] = FilterStraatContains + RegisterFuncMap["startswith-straat"] = FilterStraatStartsWith + RegisterGetters["straat"] = GettersStraat + RegisterGroupBy["straat"] = GettersStraat + //register filters for Postcode RegisterFuncMap["match-postcode"] = FilterPostcodeMatch RegisterFuncMap["contains-postcode"] = FilterPostcodeContains @@ -1057,6 +1198,27 @@ func init() { RegisterGetters["postcode"] = GettersPostcode RegisterGroupBy["postcode"] = GettersPostcode + //register filters for Huisnummer + RegisterFuncMap["match-huisnummer"] = FilterHuisnummerMatch + RegisterFuncMap["contains-huisnummer"] = FilterHuisnummerContains + RegisterFuncMap["startswith-huisnummer"] = FilterHuisnummerStartsWith + RegisterGetters["huisnummer"] = GettersHuisnummer + RegisterGroupBy["huisnummer"] = GettersHuisnummer + + //register filters for Huisletter + RegisterFuncMap["match-huisletter"] = FilterHuisletterMatch + RegisterFuncMap["contains-huisletter"] = FilterHuisletterContains + RegisterFuncMap["startswith-huisletter"] = FilterHuisletterStartsWith + RegisterGetters["huisletter"] = GettersHuisletter + RegisterGroupBy["huisletter"] = GettersHuisletter + + //register filters for Huisnummertoevoeging + RegisterFuncMap["match-huisnummertoevoeging"] = FilterHuisnummertoevoegingMatch + RegisterFuncMap["contains-huisnummertoevoeging"] = FilterHuisnummertoevoegingContains + RegisterFuncMap["startswith-huisnummertoevoeging"] = FilterHuisnummertoevoegingStartsWith + RegisterGetters["huisnummertoevoeging"] = GettersHuisnummertoevoeging + RegisterGroupBy["huisnummertoevoeging"] = GettersHuisnummertoevoeging + //register filters for Oppervlakte RegisterFuncMap["match-oppervlakte"] = FilterOppervlakteMatch RegisterFuncMap["contains-oppervlakte"] = FilterOppervlakteContains @@ -1070,14 +1232,6 @@ func init() { RegisterFuncMap["startswith-woningequivalent"] = FilterWoningequivalentStartsWith RegisterGetters["woningequivalent"] = GettersWoningequivalent RegisterGroupBy["woningequivalent"] = GettersWoningequivalent - - //register filters for Adres - RegisterFuncMap["match-adres"] = FilterAdresMatch - RegisterFuncMap["contains-adres"] = FilterAdresContains - RegisterFuncMap["startswith-adres"] = FilterAdresStartsWith - RegisterGetters["adres"] = GettersAdres - RegisterGroupBy["adres"] = GettersAdres - //register filters for WoningType RegisterFuncMap["match-woning_type"] = FilterWoningTypeMatch RegisterFuncMap["contains-woning_type"] = FilterWoningTypeContains @@ -1099,6 +1253,13 @@ func init() { RegisterGetters["labelscore_definitief"] = GettersLabelscoreDefinitief RegisterGroupBy["labelscore_definitief"] = GettersLabelscoreDefinitief + //register filters for Energieklasse + RegisterFuncMap["match-energieklasse"] = FilterEnergieklasseMatch + RegisterFuncMap["contains-energieklasse"] = FilterEnergieklasseContains + RegisterFuncMap["startswith-energieklasse"] = FilterEnergieklasseStartsWith + RegisterGetters["energieklasse"] = GettersEnergieklasse + RegisterGroupBy["energieklasse"] = GettersEnergieklasse + //register filters for Gemeentecode RegisterFuncMap["match-gemeentecode"] = FilterGemeentecodeMatch RegisterFuncMap["contains-gemeentecode"] = FilterGemeentecodeContains @@ -1225,6 +1386,8 @@ func init() { RegisterGetters["gebruiksdoelen"] = GettersGebruiksdoelen RegisterGroupBy["gebruiksdoelen"] = GettersGebruiksdoelen + RegisterGroupBy["postcodehuisnummer"] = GettersToevoegingen + validateRegisters() /* @@ -1259,6 +1422,9 @@ func createSort(items Items) sortLookup { "numid": func(i, j int) bool { return items[i].Numid < items[j].Numid }, "-numid": func(i, j int) bool { return items[i].Numid > items[j].Numid }, + "straat": func(i, j int) bool { return Straat.GetValue(items[i].Straat) < Straat.GetValue(items[j].Straat) }, + "-straat": func(i, j int) bool { return Straat.GetValue(items[i].Straat) > Straat.GetValue(items[j].Straat) }, + "postcode": func(i, j int) bool { return Postcode.GetValue(items[i].Postcode) < Postcode.GetValue(items[j].Postcode) }, @@ -1266,6 +1432,27 @@ func createSort(items Items) sortLookup { return Postcode.GetValue(items[i].Postcode) > Postcode.GetValue(items[j].Postcode) }, + "huisnummer": func(i, j int) bool { + return Huisnummer.GetValue(items[i].Huisnummer) < Huisnummer.GetValue(items[j].Huisnummer) + }, + "-huisnummer": func(i, j int) bool { + return Huisnummer.GetValue(items[i].Huisnummer) > Huisnummer.GetValue(items[j].Huisnummer) + }, + + "huisletter": func(i, j int) bool { + return Huisletter.GetValue(items[i].Huisletter) < Huisletter.GetValue(items[j].Huisletter) + }, + "-huisletter": func(i, j int) bool { + return Huisletter.GetValue(items[i].Huisletter) > Huisletter.GetValue(items[j].Huisletter) + }, + + "huisnummertoevoeging": func(i, j int) bool { + return Huisnummertoevoeging.GetValue(items[i].Huisnummertoevoeging) < Huisnummertoevoeging.GetValue(items[j].Huisnummertoevoeging) + }, + "-huisnummertoevoeging": func(i, j int) bool { + return Huisnummertoevoeging.GetValue(items[i].Huisnummertoevoeging) > Huisnummertoevoeging.GetValue(items[j].Huisnummertoevoeging) + }, + "oppervlakte": func(i, j int) bool { return Oppervlakte.GetValue(items[i].Oppervlakte) < Oppervlakte.GetValue(items[j].Oppervlakte) }, @@ -1280,9 +1467,6 @@ func createSort(items Items) sortLookup { return Woningequivalent.GetValue(items[i].Woningequivalent) > Woningequivalent.GetValue(items[j].Woningequivalent) }, - "adres": func(i, j int) bool { return items[i].Adres < items[j].Adres }, - "-adres": func(i, j int) bool { return items[i].Adres > items[j].Adres }, - "woning_type": func(i, j int) bool { return WoningType.GetValue(items[i].WoningType) < WoningType.GetValue(items[j].WoningType) }, @@ -1304,6 +1488,13 @@ func createSort(items Items) sortLookup { return LabelscoreDefinitief.GetValue(items[i].LabelscoreDefinitief) > LabelscoreDefinitief.GetValue(items[j].LabelscoreDefinitief) }, + "energieklasse": func(i, j int) bool { + return Energieklasse.GetValue(items[i].Energieklasse) < Energieklasse.GetValue(items[j].Energieklasse) + }, + "-energieklasse": func(i, j int) bool { + return Energieklasse.GetValue(items[i].Energieklasse) > Energieklasse.GetValue(items[j].Energieklasse) + }, + "gemeentecode": func(i, j int) bool { return Gemeentecode.GetValue(items[i].Gemeentecode) < Gemeentecode.GetValue(items[j].Gemeentecode) }, diff --git a/model_maps.go b/model_maps.go index 2762a35..4097cc7 100644 --- a/model_maps.go +++ b/model_maps.go @@ -10,12 +10,17 @@ package main type ModelMaps struct { Pid MappedColumn Vid MappedColumn + Straat MappedColumn Postcode MappedColumn + Huisnummer MappedColumn + Huisletter MappedColumn + Huisnummertoevoeging MappedColumn Oppervlakte MappedColumn Woningequivalent MappedColumn WoningType MappedColumn LabelscoreVoorlopig MappedColumn LabelscoreDefinitief MappedColumn + Energieklasse MappedColumn Gemeentecode MappedColumn Gemeentenaam MappedColumn Buurtcode MappedColumn @@ -38,12 +43,17 @@ var BitArrays map[string]fieldBitarrayMap var Pid MappedColumn var Vid MappedColumn +var Straat MappedColumn var Postcode MappedColumn +var Huisnummer MappedColumn +var Huisletter MappedColumn +var Huisnummertoevoeging MappedColumn var Oppervlakte MappedColumn var Woningequivalent MappedColumn var WoningType MappedColumn var LabelscoreVoorlopig MappedColumn var LabelscoreDefinitief MappedColumn +var Energieklasse MappedColumn var Gemeentecode MappedColumn var Gemeentenaam MappedColumn var Buurtcode MappedColumn @@ -73,12 +83,17 @@ func init() { func setUpRepeatedColumns() { Pid = NewReapeatedColumn("pid") Vid = NewReapeatedColumn("vid") + Straat = NewReapeatedColumn("straat") Postcode = NewReapeatedColumn("postcode") + Huisnummer = NewReapeatedColumn("huisnummer") + Huisletter = NewReapeatedColumn("huisletter") + Huisnummertoevoeging = NewReapeatedColumn("huisnummertoevoeging") Oppervlakte = NewReapeatedColumn("oppervlakte") Woningequivalent = NewReapeatedColumn("woningequivalent") WoningType = NewReapeatedColumn("woning_type") LabelscoreVoorlopig = NewReapeatedColumn("labelscore_voorlopig") LabelscoreDefinitief = NewReapeatedColumn("labelscore_definitief") + Energieklasse = NewReapeatedColumn("energieklasse") Gemeentecode = NewReapeatedColumn("gemeentecode") Gemeentenaam = NewReapeatedColumn("gemeentenaam") Buurtcode = NewReapeatedColumn("buurtcode") @@ -102,12 +117,17 @@ func CreateMapstore() ModelMaps { return ModelMaps{ Pid, Vid, + Straat, Postcode, + Huisnummer, + Huisletter, + Huisnummertoevoeging, Oppervlakte, Woningequivalent, WoningType, LabelscoreVoorlopig, LabelscoreDefinitief, + Energieklasse, Gemeentecode, Gemeentenaam, Buurtcode, @@ -131,12 +151,17 @@ func LoadMapstore(m ModelMaps) { Pid = m.Pid Vid = m.Vid + Straat = m.Straat Postcode = m.Postcode + Huisnummer = m.Huisnummer + Huisletter = m.Huisletter + Huisnummertoevoeging = m.Huisnummertoevoeging Oppervlakte = m.Oppervlakte Woningequivalent = m.Woningequivalent WoningType = m.WoningType LabelscoreVoorlopig = m.LabelscoreVoorlopig LabelscoreDefinitief = m.LabelscoreDefinitief + Energieklasse = m.Energieklasse Gemeentecode = m.Gemeentecode Gemeentenaam = m.Gemeentenaam Buurtcode = m.Buurtcode @@ -157,12 +182,17 @@ func LoadMapstore(m ModelMaps) { // register the columns RegisteredColumns[Pid.Name] = Pid RegisteredColumns[Vid.Name] = Vid + RegisteredColumns[Straat.Name] = Straat RegisteredColumns[Postcode.Name] = Postcode + RegisteredColumns[Huisnummer.Name] = Huisnummer + RegisteredColumns[Huisletter.Name] = Huisletter + RegisteredColumns[Huisnummertoevoeging.Name] = Huisnummertoevoeging RegisteredColumns[Oppervlakte.Name] = Oppervlakte RegisteredColumns[Woningequivalent.Name] = Woningequivalent RegisteredColumns[WoningType.Name] = WoningType RegisteredColumns[LabelscoreVoorlopig.Name] = LabelscoreVoorlopig RegisteredColumns[LabelscoreDefinitief.Name] = LabelscoreDefinitief + RegisteredColumns[Energieklasse.Name] = Energieklasse RegisteredColumns[Gemeentecode.Name] = Gemeentecode RegisteredColumns[Gemeentenaam.Name] = Gemeentenaam RegisteredColumns[Buurtcode.Name] = Buurtcode diff --git a/store.go b/store.go index 1d8ef78..ce31e36 100644 --- a/store.go +++ b/store.go @@ -62,6 +62,8 @@ func (items *Items) FillIndexes() { ITEMS[i].GeoIndex(ITEMS[i].Label) } + S2CELLS.Sort() + diff := time.Since(start) msg := fmt.Sprint("Index set time: ", diff) fmt.Printf(WarningColorN, msg) diff --git a/testdata/README.md b/testdata/README.md new file mode 100644 index 0000000..19ed6b1 --- /dev/null +++ b/testdata/README.md @@ -0,0 +1,3 @@ +To create new testdata: + +- curl -O 'http://127.0.0.1:8000/list/?match-postcode=1011AB&startswith-huisnummer=10&sortby=huisnummer&format=csv' diff --git a/testdata/dataselectie_vbo_energie_20210217.head.csv.gz b/testdata/dataselectie_vbo_energie_20210217.head.csv.gz deleted file mode 100644 index 2505c2b3f105ef8a08c52e6c090e91082a664387..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 717 zcmV;;0y6y{iwFpBActT817u-zVRL0{Wn*+{WnXq;Z(n6@WpZa}WnVHdGBGeRF*hz~ zWnpA4V{>)@<&{lu)G!c+@BE5YPE<0jzwI2`0~7%*2R@FGcjGQ@5<4W$t`OqCGf8(# z(jeN31GLRbyPnt1c=qwkIH_Hppkyq;(>(Itk$Sn**UE5|;nj-erHhAaP_G{-h zrnEkSWb($g*Hc;iR`&}_v$SQq3XRP+Av9&E-FhnL_QH7=oxK2kZ7U1z>~7P&>5Zwt zTy|Zvk7pR~^sT!rc5*-5sdW=J&ad3w*|oi&4{iv&Ha^eRrp>JJnK8aCI|v8gYBo6Z$& z9xQNAo+X$`Mli;N5=~Os@6=1U*KjI%s@P7Vcz86yT7pC{A;%{PB8nrjHx~p8lAtqZ z;{p!SdW7Ng>RtanIAIT1`kFbhEsnNk{a#g&khk^>}% zpfDmlk8v=u0VlNLDC%@C72LgVW#=P&=GFJ!B~KP zTZP@=0|}(@^a|sIGcv9))_Wh*5&uU9{RO0ZWY9o}-cE=6Vs{tN&B9|Kk2 diff --git a/testdata/dataselectie_vbo_energie_20210505.head.csv b/testdata/dataselectie_vbo_energie_20210505.head.csv new file mode 100644 index 0000000..0437c02 --- /dev/null +++ b/testdata/dataselectie_vbo_energie_20210505.head.csv @@ -0,0 +1,11 @@ +pid,vid,numid,straat,postcode,huisnummer,huisletter,huisnummertoevoeging,oppervlakte,woningequivalent,woning_type,labelscore_voorlopig,labelscore_definitief,energieklasse,gemeentecode,gemeentenaam,buurtcode,buurtnaam,wijkcode,wijknaam,provinciecode,provincienaam,point,pand_gas_ean_aansluitingen,group_id_2020,p6_gas_aansluitingen_2020,p6_gasm3_2020,p6_kwh_2020,p6_totaal_pandoppervlak_m2,pand_bouwjaar,pand_gas_aansluitingen,gebruiksdoelen +0363100012181960,0363010000785105,0363200000081085,De Ruijterkade,1011AB,105,,H,348,3,Niet wonen,0,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{bijeenkomstfunctie} +0363100012181960,0363010000784610,0363200000081087,De Ruijterkade,1011AB,105,,2,72,1,Flat/appartement,7,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{woonfunctie} +0363100012181960,0363010000964973,0363200000081086,De Ruijterkade,1011AB,105,,1,174,1,Niet wonen,0,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{kantoorfunctie} +0363100012181960,0363010000977861,0363200000081088,De Ruijterkade,1011AB,105,,3,84,1,Flat/appartement,7,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{woonfunctie} +0363100012181960,0363010000618793,0363200000081090,De Ruijterkade,1011AB,106,,2,82,1,Flat/appartement,7,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{woonfunctie} +0363100012181960,0363010012064067,0363200012064203,De Ruijterkade,1011AB,106,,1,1,0,Niet wonen,0,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{kantoorfunctie} +0363100012181960,0363010000618799,0363200000081091,De Ruijterkade,1011AB,106,,3,60,1,Flat/appartement,7,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{woonfunctie} +0363100012181958,0363010000618800,0363200000414309,De Ruijterkade,1011AB,107,,H,280,2,Niet wonen,0,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.90541865504259 52.37775355302933),1,0,0,0,14940,6408,1005,1,{overige gebruiksfunctie} +0363100012181958,0363010000977860,0363200000081092,De Ruijterkade,1011AB,107,,,1774,14,Niet wonen,0,0,D,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.90541865504259 52.37775355302933),1,0,0,0,14940,6408,1005,1,{kantoorfunctie} +0363100012181958,0363010000618801,0363200000414310,De Ruijterkade,1011AB,107,,5,308,1,Flat/appartement,7,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.90541865504259 52.37775355302933),1,0,0,0,14940,6408,1005,1,{woonfunctie}