diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..8254c06 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +*.csv +*.csv2 +.git diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..847e8af --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +extras/model.go +testdata/* +*.gz +lambdadb diff --git a/Dockerfile b/Dockerfile index 8ee75ab..18b8934 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ RUN apk update && apk add --no-cache git RUN apk --no-cache add ca-certificates WORKDIR /app -COPY . /app/ +COPY *.go /app/ # Fetch dependencies. RUN go get -d -v @@ -23,6 +23,7 @@ COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ #COPY --from=builder /app/files/ITEMS.txt.gz /app/files/ITEMS.txt.gz WORKDIR /app + # Run the binary. ENV http_db_host "0.0.0.0:8000" diff --git a/README.md b/README.md index 3b7be3a..d4e030c 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,58 @@ # LambdaDB In memory database that uses filters to get the data you need. +Lambda DB has a tiny codebase which does a lot +Lambda is not ment as a persistance storage or a replacement for a traditional +Database but as fast analytics engine cache representation engine. -Can be used for your needs by changing the models.go file to your needs. +powers: https://dego.vng.nl + +## Properties: + +- Insanely fast API. 1ms respsonses +- Fast to setup. +- Easy to deploy. +- Easy to customize. +- Easy export data + +- Implement custom authorized filters. + +## Indexes + +- S2 geoindex for fast point lookup +- Bitarrays +- Mapping + +- Your own special needs indexes! + +## Flow: + +Generate a model and load your data. +The API is generated from your model. +Deploy. + +Condition: Your dataset must fit in memory. + +Can be used for your needs by changing the `models.go` file to your needs. Creating and registering of the functionality that is needed. ### Steps You can start the database with only a csv. Go over steps below, And see the result in your browser. + 1. place csv file, in dir extras. -2. `python3 create_model.py > ../model.go` -3. cd ../ -4. go fmt +2. `python3 create_model_.py` answer the questions. +3. go fmt model.go +4. mv model.go ../ 5. go build 6. ./lambda --help 7. ./lambda --csv assets/items.csv or `python3 ingestion.py -b 1000` 9. curl 127.0.0.1:8128/help/ 10. browser 127.0.0.1:8128/ - 11. instructions curl 127.0.0.1:8128/help/ | python -m json.tool - ### Running sudo docker-compose up --no-deps --build @@ -30,3 +60,19 @@ sudo docker-compose up --no-deps --build promql {instance="lambdadb:8000"} python3 extras/ingestion.py -f movies_subset.tsv -format tsv -dbhost 127.0.0.1:8000 +======= + +1. instructions curl 127.0.0.1:8000/help/ | python -m json.tool + +### Questions + + + +### TODO + +- load data directly from a database (periodic) +- document the `create_model.py` questions +- use a remote data source +- use some more efficient storage method (done) +- generate swagger API +- Add more tests diff --git a/column.go b/column.go new file mode 100644 index 0000000..664e50d --- /dev/null +++ b/column.go @@ -0,0 +1,148 @@ +package main + +import ( + "errors" + "fmt" + "github.com/Workiva/go-datastructures/bitarray" + "log" + "strings" +) + +type fieldIdxMap map[string]uint32 +type IdxFieldMap map[uint32]string + +/* +MappedColumn store fields in Idx +*/ + +type MappedColumn struct { + Idx fieldIdxMap // stores field to int values + Field IdxFieldMap // stores int to field values to recover actual fields + IdxTracker uint32 + Name string +} + +type ColumnRegister map[string]MappedColumn + +var RegisteredColumns ColumnRegister + +func init() { + RegisteredColumns = make(ColumnRegister) +} + +func NewReapeatedColumn(column string) MappedColumn { + m := MappedColumn{ + make(fieldIdxMap), + make(IdxFieldMap), + 0, + column, + } + RegisteredColumns[column] = m + return m +} + +// Store field name as idx value and idx as field value +func (m *MappedColumn) Store(field string) { + + if _, ok := m.Idx[field]; !ok { + m.Idx[field] = m.IdxTracker + m.Field[m.IdxTracker] = field + m.IdxTracker++ + } +} + +// Store Array field (postgres Array). +func (m *MappedColumn) StoreArray(field string) []uint32 { + + fieldsArray := make([]uint32, 0) + + // parsing {a, b} array values + // string should be at least 2 example "{}" == size 2 + if len(field) > 2 { + fields, err := ParsePGArray(field) + + if err != nil { + log.Fatal(err, "error parsing array ") + } + + for _, gd := range fields { + m.Store(gd) + } + + for _, v := range fields { + fieldsArray = append(fieldsArray, Gebruiksdoelen.GetIndex(v)) + } + } + return fieldsArray +} + +func (m *MappedColumn) GetValue(idx uint32) string { + return m.Field[idx] +} + +func (m *MappedColumn) GetArrayValue(idxs []uint32) string { + + result := make([]string, 0) + for _, v := range idxs { + vs := m.GetValue(v) + result = append(result, vs) + } + return strings.Join(result, ", ") +} + +func (m *MappedColumn) GetIndex(s string) uint32 { + return m.Idx[s] +} + +// SetBitArray +func SetBitArray(column string, i uint32, label int) { + + var ba bitarray.BitArray + var ok bool + + // check if map of bitmaps is present for column + var map_ba fieldBitarrayMap + + if _, ok = BitArrays[column]; !ok { + map_ba := make(fieldBitarrayMap) + BitArrays[column] = map_ba + } + + map_ba = BitArrays[column] + + // check for existing bitarray for i value + ba, ok = map_ba[i] + + if !ok { + ba = bitarray.NewSparseBitArray() + map_ba[i] = ba + } + // set bit for item label. + ba.SetBit(uint64(label)) +} + +func GetBitArray(column, value string) (bitarray.BitArray, error) { + + var ok bool + + if _, ok = BitArrays[column]; !ok { + return nil, errors.New("no bitarray filter found for " + column) + } + + // convert string value to actual indexed int. + i, ok := RegisteredColumns[column].Idx[value] + + if !ok { + msg := fmt.Sprintf("no indexed int value found for %s %s", column, value) + return nil, errors.New(msg) + } + + ba, ok := BitArrays[column][i] + + if !ok { + msg := fmt.Sprintf("no bitarray found for %s %s %d", column, value, i) + return nil, errors.New(msg) + } + + return ba, nil +} diff --git a/csv.go b/csv.go index f748c4b..12674f0 100644 --- a/csv.go +++ b/csv.go @@ -1,18 +1,17 @@ package main import ( - "compress/gzip" "encoding/json" "errors" "fmt" + csv "github.com/JensRantil/go-csv" + "github.com/cheggaaa/pb" + "github.com/klauspost/pgzip" "io" "log" "os" "strings" "unicode/utf8" - - csv "github.com/JensRantil/go-csv" - "github.com/cheggaaa/pb" ) func containsDelimiter(col string) bool { @@ -23,14 +22,17 @@ func containsDelimiter(col string) bool { // Parse columns from first header row or from flags func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string, error) { + var err error var columns []string + if fields != "" { columns = strings.Split(fields, ",") if skipHeader { - reader.Read() //Force consume one row + reader.Read() // Force consume one row } + } else { columns, err = reader.Read() fmt.Printf("%v columns\n%v\n", len(columns), columns) @@ -38,6 +40,10 @@ func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string, fmt.Printf("FOUND ERR\n") return nil, err } + itemIn := ItemIn{} + if len(columns) != len(itemIn.Columns()) { + panic(errors.New("columns mismatch")) + } } for _, col := range columns { @@ -47,24 +53,22 @@ func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string, } } - //for i, col := range columns { - // columns[i] = postgresify(col) - //} - return columns, nil } func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, delimiter string, nullDelimiter string) (error, int, int) { + success := 0 failed := 0 - items := Items{} + items := ItemsIn{} for { - item := Item{} - columns := item.Columns() + itemIn := ItemIn{} + columns := itemIn.Columns() cols := make([]interface{}, len(columns)) + record, err := reader.Read() if err == io.EOF { @@ -73,6 +77,7 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, if err != nil { line := strings.Join(record, delimiter) + failed++ if ignoreErrors { @@ -97,8 +102,9 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, // marschall it to bytes b, _ := json.Marshal(itemMap) + // fill the new Item instance with values - if err := json.Unmarshal([]byte(b), &item); err != nil { + if err := json.Unmarshal([]byte(b), &itemIn); err != nil { line := strings.Join(record, delimiter) failed++ @@ -113,21 +119,24 @@ func copyCSVRows(itemChan ItemsChannel, reader *csv.Reader, ignoreErrors bool, if len(items) > 100000 { itemChan <- items - items = Items{} + items = ItemsIn{} } - items = append(items, &item) + + items = append(items, &itemIn) success++ } // add leftover items itemChan <- items + items = nil return nil, success, failed } func importCSV(filename string, itemChan ItemsChannel, ignoreErrors bool, skipHeader bool, - delimiter string, nullDelimiter string) error { + delimiter string, nullDelimiter string, +) error { dialect := csv.Dialect{} dialect.Delimiter, _ = utf8.DecodeRuneInString(delimiter) @@ -142,14 +151,20 @@ func importCSV(filename string, itemChan ItemsChannel, defer file.Close() bar = NewProgressBar(file) - fz, err := gzip.NewReader(io.TeeReader(file, bar)) - if err != nil { - return err + if strings.HasSuffix(filename, ".gz") { + fz, err := pgzip.NewReader(io.TeeReader(file, bar)) + + if err != nil { + return err + } + defer fz.Close() + reader = csv.NewDialectReader(fz, dialect) + } else { + fz := io.TeeReader(file, bar) + reader = csv.NewDialectReader(fz, dialect) } - defer fz.Close() - reader = csv.NewDialectReader(fz, dialect) } else { reader = csv.NewDialectReader(os.Stdin, dialect) } @@ -157,6 +172,7 @@ func importCSV(filename string, itemChan ItemsChannel, var err error _, err = parseColumns(reader, skipHeader, "") + if err != nil { log.Fatal(err) } @@ -179,7 +195,7 @@ func importCSV(filename string, itemChan ItemsChannel, return fmt.Errorf("line %d: %s", lineNumber, err) } - fmt.Printf("%d rows imported", success) + fmt.Printf("%d rows imported\n", success) if ignoreErrors && failed > 0 { fmt.Printf("%d rows could not be imported and have been written to stderr.", failed) diff --git a/custom.go b/custom.go new file mode 100644 index 0000000..ad14371 --- /dev/null +++ b/custom.go @@ -0,0 +1,58 @@ +package main + +import ( + "fmt" + "strconv" + "strings" +) + +type registerCustomGroupByFunc map[string]func(*Item, ItemsGroupedBy) + +var RegisterGroupByCustom registerCustomGroupByFunc + +func init() { + + RegisterGroupByCustom = make(registerCustomGroupByFunc) + RegisterGroupByCustom["gebruiksdoelen-mixed"] = GroupByGettersGebruiksdoelen + +} + +func reduceWEQ(items Items) map[string]string { + result := make(map[string]string) + weq := 0 + for i := range items { + _weq, err := strconv.ParseInt(Woningequivalent.GetValue(items[i].Woningequivalent), 10, 64) + if err != nil { + panic(err) + } + weq += int(_weq) + } + result["woningenquivalent"] = strconv.Itoa(weq) + return result +} + +func GettersToevoegingen(i *Item) string { + return Postcode.GetValue(i.Postcode) + " " + Huisnummer.GetValue(i.Huisnummer) +} + +// getter Gebruiksdoelen +func GroupByGettersGebruiksdoelen(item *Item, grouping ItemsGroupedBy) { + + for i := range item.Gebruiksdoelen { + groupkey := Gebruiksdoelen.GetValue(item.Gebruiksdoelen[i]) + grouping[groupkey] = append(grouping[groupkey], item) + } +} + +func GetAdres(i *Item) string { + adres := fmt.Sprintf("%s %s %s %s %s %s", + Straat.GetValue(i.Straat), + Huisnummer.GetValue(i.Huisnummer), + Huisletter.GetValue(i.Huisletter), + Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging), + Postcode.GetValue(i.Postcode), + Gemeentenaam.GetValue(i.Gemeentenaam)) + + adres = strings.ReplaceAll(adres, " ", " ") + return adres +} diff --git a/docker-compose.yml b/docker-compose.yml index 991360a..2d3833b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,7 @@ services: build: . ports: - 8000:8000 + prometheus: image: prom/prometheus:v2.21.0 ports: diff --git a/examples/curlgeotest.sh b/examples/curlgeotest.sh new file mode 100755 index 0000000..fb0d54f --- /dev/null +++ b/examples/curlgeotest.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +set -x +set -e +set -u + + +curl -vvv \ + --data-urlencode 'geojson={ + "type": "Polygon", + "coordinates": [ + [ + [4.902321, 52.428306], + [4.90127, 52.427024], + [4.905281, 52.426069], + [4.906782, 52.426226], + [4.906418, 52.427469], + [4.902321, 52.428306] + ] + ] + }' \ + 'http://127.0.0.1:8000/list/?groupby=postcode&reduce=count' diff --git a/examples/curltest.sh b/examples/curltest.sh new file mode 100755 index 0000000..29291ae --- /dev/null +++ b/examples/curltest.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -x +set -e +set -u + +# should be cached. +curl -vv 'http://127.0.0.1:8000/list/?groupby=woning_type&reduce=count' + +# should not be cached.(using bitmaps) +curl -vv 'http://127.0.0.1:8000/list/?match-wijkcode=WK036394&groupby=woning_type&reduce=count' diff --git a/extras/config.yaml b/extras/config.yaml new file mode 100644 index 0000000..6c4b34a --- /dev/null +++ b/extras/config.yaml @@ -0,0 +1,35 @@ +model: + adres: u + buurtcode: b + buurtnaam: r + energieklasse: b + gebruiksdoelen: r + gemeentecode: b + gemeentenaam: r + group_id_2020: u + huisletter: r + huisnummer: r + huisnummertoevoeging: r + index: '0' + labelscore_definitief: b + labelscore_voorlopig: b + numid: u + oppervlakte: r + p6_gas_aansluitingen_2020: r + p6_gasm3_2020: r + p6_kwh_2020: r + p6_totaal_pandoppervlak_m2: r + pand_bouwjaar: r + pand_gas_aansluitingen: r + pand_gas_ean_aansluitingen: r + pid: b + point: g + postcode: b + provinciecode: b + provincienaam: r + straat: r + vid: r + wijkcode: b + wijknaam: r + woning_type: b + woningequivalent: r diff --git a/extras/create_model.py b/extras/create_model.py index 19147ae..6326805 100644 --- a/extras/create_model.py +++ b/extras/create_model.py @@ -1,277 +1,382 @@ -### First version is going to assume everything is a string -### also known as string theory:p +# -*- coding: utf-8 -*- +""" +Load first rows from csv, ask some questions +and generate a models.go to jumpstart +your lambda_db project for the given csv file + +models.go contains all the field information +and functions of rows in your data. + +- Repeated option to store repeated + values in a map and each individual items + only stores uint32 reference to map key. -### column with the name "value" or "index" will be used as index -### else the first column will be set as index, when index is enabled. -### this can be changed later in the generated model.go file +- BitArray option which is like Repeated + value but also creates a map[key]bitmap for all + items containing field value. Makes it possible + to do fast 'match' lookups. + + +python create_model.py your.csv +""" import csv import sys -from filereader import create_reader, supported_fileformats +import os +from re import sub +from jinja2 import Environment, FileSystemLoader -def create_struct(item): - start = "type Item struct {\n" - # TODO add type - lines= [f'{k.capitalize()} string `json:"{k.lower()}"`' for k, v in item.items()] - stop = "\n}\n" - return start + "\n".join(lines) + stop +import yaml -def create_columns(item): - start = """ - func (i Item) Columns() []string { - return []string{ - """ - lines = [f'"{k.lower()}",' for k in item.keys()] - stop = """\n}\n}""" - return start + "\n".join(lines) + stop +if '-f' in sys.argv: + filename = str(sys.argv[sys.argv.index('-f')+1]) +else: + filename = "items.csv" +if '-c' in sys.argv: + config = str(sys.argv[sys.argv.index('-c')+1]) +else: + config = "config.yaml" -def create_row(item): - start = """ - func (i Item) Row() []string { - return []string{ - """ - lines = [f"i.{k.capitalize()}," for k in item.keys()] - stop = """\n}\n}""" - return start + "\n".join(lines) + stop - - -def get_index_column(item): - special_columns = ["value", "index"] - for column in special_columns: - if column in item: - return column - - # we tried, let's return the first column - n = iter(item.keys()) - return next(n) - - -def create_getindex(item): - index_column = get_index_column(item) - start = """ - func (i Item) GetIndex() string { - return """ - middle = f"i.{index_column.capitalize()}" - stop = """\n}""" - return start + middle + stop - - -def create_filter_contains(column): - return ( - f"func Filter{column.capitalize()}Contains(i *Item, s string) bool" + "{" + "\n" - f"return strings.Contains(i.{column.capitalize()}, s)" - "\n" + "}" - ) - -def create_filter_startswith(column): - return ( - f"func Filter{column.capitalize()}StartsWith(i *Item, s string) bool" + "{" + "\n" - f"return strings.HasPrefix(i.{column.capitalize()}, s)" - "\n" + "}" - ) - -def create_filter_match(column): - return ( - f"func Filter{column.capitalize()}Match(i *Item, s string) bool" + "{" + "\n" - f"return i.{column.capitalize()} == s" - "\n" + "}" - ) - - -def create_getter(column): - return ( - f"func Getters{column.capitalize()}(i *Item) string" + "{" + "\n" - f"return i.{column.capitalize()}" - "\n" + "}" - ) - - -def create_reduce(column): - return """ - func reduceCount(items Items) map[string]string { - result := make(map[string]string) - result["count"] = strconv.Itoa(len(items)) - return result -} - """ +with open(filename) as f: + reader = csv.DictReader(f) + row = dict(next(reader)) + +cfg = {} + +if os.path.isfile(config): + with open(config, 'r') as stream: + cfg = yaml.load(stream, Loader=yaml.FullLoader)['model'] -def create_init_register(): - return """ - RegisterFuncMap = make(registerFuncType) - RegisterGroupBy = make(registerGroupByFunc) - RegisterGetters = make(registerGettersMap) - RegisterReduce = make(registerReduce) +env = Environment( + loader=FileSystemLoader('./templates'), +) + +# keep track of all column names and all original names in csv +allcolumns = [] +allcolumns_org = [] +repeated = [] +repeated_org = [] +bitarray = [] +bitarray_org = [] +unique = [] +unique = [] +unique_org = [] +ignored = [] +ignored_org = [] +geocolumns = [] +geocolumns_org = [] + + +def gocamelCase(string): + """convert string to camelCase + + woning_type -> WoningType """ + string = sub(r"(_|-)+", " ", string).title().replace(" ", "") + return string + + +# ask some questions about columns. +index = 0 +for k in row.keys(): + + # go camelcase column names + kc = gocamelCase(k) + + options = ['r', 'u', 'i', 'g', 'b'] + while True: + + action = None + + if cfg.get(k): + print(f"reading from config {k} {cfg[k]}") + action = cfg[k] + else: + # keep asking for valid input + q1 = ( + "(R)epeated value? has less then (2^16=65536) option.", + "(B)itarray, repeated column optimized for fast match.", + "(U)nique, (G)eo lat/lon point or (I)gnore ? r/b/u/g/i?." + ) + action = input(f"idx:{index} is {k} {q1}") # noqa + + if action == '': + print(f"pick one from {options}") + continue + if action not in options: + continue + break + + cfg[k] = action + + if action == 'r': + repeated.append(kc) + repeated_org.append(k) + elif action == 'u': + unique.append(kc) + unique_org.append(k) + elif action == 'i': + ignored.append(kc) + ignored_org.append(k) + elif action == 'g': + geocolumns.append(kc) + geocolumns_org.append(k) + unique.append(kc) + unique_org.append(k) + elif action == 'b': + # same as repeated but with some extra bitarray stuff + repeated.append(kc) + repeated_org.append(k) + bitarray.append(kc) + bitarray_org.append(k) + else: + print('invalid input') + sys.exit(-1) + + allcolumns.append(kc) + allcolumns_org.append(k) + index += 1 + +# ask for a index column +while True: + index = None + # keep asking for valid input + if cfg.get('index'): + index = cfg['index'] + else: + index = input(f"which column is idx? 0 - {len(allcolumns) - 1} ") + + cfg['index'] = index + + try: + index = int(index) -def create_register_match_func(column): - return f'RegisterFuncMap["match-{column.lower()}"] = Filter{column.capitalize()}Match' + if allcolumns[index] in ignored: + print('Selected an ignored column for index') + raise ValueError + if -1 < index < len(allcolumns): + break + + except ValueError: + continue + + print('try again..') -def create_register_contains_func(column): - return f'RegisterFuncMap["contains-{column.lower()}"] = Filter{column.capitalize()}Contains' +# save answers in config file +with open(config, 'w') as f: + dict_file = {'model': cfg} + yaml.dump(dict_file, f) + print(f'saved answers in config {config}') + + +# setup initial data structs for each repeated column +initRepeatColumns = [] +repeatColumnNames = [] +loadRepeatColumnNames = [] +mappedColumns = [] +registerColumns = [] + + +for columnName, c2 in zip(repeated, repeated_org): + initRow = f'\t {columnName} = NewReapeatedColumn("{c2}")\n' + initRepeatColumns.append(initRow) + + repeatRow = f"\t {columnName}, \n" + repeatColumnNames.append(repeatRow) + + loadRow = f"\t {columnName} = m.{columnName} \n" + loadRepeatColumnNames.append(loadRow) + + registerColumnsRow = f"\t RegisteredColumns[{columnName}.Name] = {columnName} \n" + registerColumns.append(registerColumnsRow) + + mappedColumnsRow = f"\t {columnName} MappedColumn \n" + mappedColumns.append(mappedColumnsRow) + + +# create bitarrays with item labels for column values. +bitArrayStores = [] +for c1, c2 in zip(bitarray, bitarray_org): + onerow = f'\tSetBitArray("{c2}", i.{c1}, i.Label)\n' + bitArrayStores.append(onerow) + + +# create ItemFull struct fields +columnsItemIn = [] + +for c1, c2 in zip(allcolumns, allcolumns_org): + onerow = f'\t {c1} string `json:"{c2}"`\n' + columnsItemIn.append(onerow) + +# create ItemFull struct fields +columnsItemOut = [] +for c1, c2 in zip(allcolumns, allcolumns_org): + + if c1 in ignored: + continue + onerow = f'\t {c1} string `json:"{c2}"`\n' + columnsItemOut.append(onerow) -def create_register_startswith_func(column): - return f'RegisterFuncMap["startswith-{column.lower()}"] = Filter{column.capitalize()}StartsWith' +# create Item struct fields +columnsItem = [] +for c1, c2 in zip(allcolumns, allcolumns_org): + if c1 in ignored: + continue -def create_register_getter(column): - return f'RegisterGetters["{column.lower()}"] = Getters{column.capitalize()}' + onerow = f"\t{c1} string\n" + if c1 in repeated: + onerow = f"\t{c1} uint32\n" + columnsItem.append(onerow) -def create_register_groupby(column): - return f'RegisterGroupBy["{column.lower()}"] = Getters{column.capitalize()}' +# create Shrink code for repeated fields +# where we map uint32 to a string value. +shrinkVars = [] +shrinkItems = [] +for c in repeated: + mappedcolumn = f"var {c} MappedColumn\n" + shrinkVars.append(mappedcolumn) + shrinkItems.append(f"\t {c}.Store(i.{c})\n") -def create_register_reduce(column): - return 'RegisterReduce["count"] = reduceCount' +# create the actual shrinked/expand Item fields. +shrinkItemFields = [] +expandItemFields = [] -def create_grouped(): - return """ -type GroupedOperations struct { - Funcs registerFuncType - GroupBy registerGroupByFunc - Getters registerGettersMap - Reduce registerReduce -} +for c in allcolumns: -var Operations GroupedOperations + if c in ignored: + continue -var RegisterFuncMap registerFuncType -var RegisterGroupBy registerGroupByFunc -var RegisterGetters registerGettersMap -var RegisterReduce registerReduce -""" + if c in repeated: + # string to unint + shrinkItemFields.append(f"\t\t{c}.GetIndex(i.{c}),\n") + # unint back to string + expandItemFields.append(f"\t\t{c}.GetValue(i.{c}),\n") + else: + shrinkItemFields.append(f"\t\ti.{c},\n") + expandItemFields.append(f"\t\ti.{c},\n") + + +# ItemIn Columns +inColumns = [] +for c in allcolumns_org: + inColumns.append(f'\t\t"{c}",\n') + +# ItemOut Columns +outColumns = [] +for cc, c in zip(allcolumns, allcolumns_org): + # cc CamelCaseColumn. + if cc in ignored: + continue + outColumns.append(f'\t\t"{c}",\n') + +# create column filters. +# match, startswith, contains etc + +columnFilters = [] +filtertemplate = env.get_template("filters.jinja2") + +for c in allcolumns: + if c in ignored: + continue -def create_sortby_line_plus(column): - return f'"{column.lower()}"' + ": func(i, j int) bool { return " + f"items[i].{column.capitalize()} < items[j].{column.capitalize()} " + " }," - -def create_sortby_line_minus(column): - return f'"-{column.lower()}"' + ": func(i, j int) bool { return " + f"items[i].{column.capitalize()} > items[j].{column.capitalize()} " + " }," - -def create_sortby(row): - start = """func sortBy(items Items, sortingL []string) (Items, []string) { - sortFuncs := map[string]func(int, int) bool{""" - lines = [] - for k in row.keys(): - lines.append(create_sortby_line_plus(k)) - lines.append(create_sortby_line_minus(k)) - lines.append("\n") - lines.append("}") - end = """ - for _, sortFuncName := range sortingL { - sortFunc := sortFuncs[sortFuncName] - sort.Slice(items, sortFunc) - } - // TODO must be nicer way - keys := []string{} - for key := range sortFuncs { - keys = append(keys, key) - } - - return items, keys - }""" - return start + "\n".join(lines) + end - -if __name__ == "__main__": - - filename = str(sys.argv[sys.argv.index('-f')+1]) if '-f' in sys.argv else "items.csv" - file_format = str(sys.argv[sys.argv.index('-format')+1]) if '-format' in sys.argv else "csv" - - if file_format not in supported_fileformats(): - print(f"{file_format} not part of supported file formats {','.join(supported_fileformats())}") - sys.exit() - - with open(filename) as f: - reader = create_reader(f, file_format) - row = dict(next(reader)) - - print("package main") - print() - - print("import (") - print('"sort"') - print('"strconv"') - print('"strings"') - print(")") - print(create_struct(row)) - print() - print(create_columns(row)) - print() - print(create_row(row)) - print() - print(create_getindex(row)) - print() - - print("// contain filters") - for k in row.keys(): - print(create_filter_contains(k)) - - print() - print("// startswith filters") - for k in row.keys(): - print(create_filter_startswith(k)) - - print() - print("// match filters") - for k in row.keys(): - print(create_filter_match(k)) - - print() - print("// reduce functions") - print(create_reduce(None)) - - print() - print("// getters") - for k in row.keys(): - print(create_getter(k)) - print() - - - print(create_grouped()) - print("func init() {") - print(create_init_register()) - - print() - print("// register match filters") - for k in row.keys(): - print(create_register_match_func(k)) - - print() - print("// register contains filters") - for k in row.keys(): - print(create_register_contains_func(k)) - - print() - print("// register startswith filters") - for k in row.keys(): - print(create_register_startswith_func(k)) - print() - - print() - print("// register getters ") - for k in row.keys(): - print(create_register_getter(k)) - print() - - print() - print("// register groupby ") - for k in row.keys(): - print(create_register_groupby(k)) - print() - - - print() - print("// register reduce functions") - print(create_register_reduce(None)) - - print("}") - - print(create_sortby(row)) - print() + lookup = f"i.{c}" + if c in repeated: + lookup = f"{c}.GetValue(i.{c})" + + txt = filtertemplate.render(column=c, lookup=lookup) + columnFilters.append(txt) + +registerFilters = [] +rtempl = env.get_template('registerFilters.jinja2') +# register filters +for c, co in zip(allcolumns, allcolumns_org): + if c in ignored: + continue + txt = rtempl.render(co=co, columnName=c, bitarray=c in bitarray) + registerFilters.append(txt) + +sortColumns = [] +sortTemplate = env.get_template('sortfunc.jinja2') + +# create sort functions +for c, co in zip(allcolumns, allcolumns_org): + if c in ignored: + continue + + c1 = f"items[i].{c} < items[j].{c}" + c2 = f"items[i].{c} > items[j].{c}" + + if c in repeated: + c1 = f"{c}.GetValue(items[i].{c}) < {c}.GetValue(items[j].{c})" + c2 = f"{c}.GetValue(items[i].{c}) > {c}.GetValue(items[j].{c})" + + txt = sortTemplate.render(co=co, c1=c1, c2=c2) + sortColumns.append(txt) + + +csv_columns = [] +for c in allcolumns: + csv_columns.append(f'\t"{c}",\n') + + +# Finally render the model.go template +modeltemplate = env.get_template('model.template.jinja2') +mapstemplate = env.get_template('modelmap.template.jinja2') + +geometryGetter = '""' +print('GEOCOLUMNS: ' + " ".join(geocolumns)) +if len(geocolumns) == 1: + geometryGetter = f"Getters{geocolumns[0]}(&i)" + +output = modeltemplate.render( + columnsItemIn=''.join(columnsItemIn), + columnsItemOut=''.join(columnsItemOut), + columnsItem=''.join(columnsItem), + shrinkItems=''.join(shrinkItems), + shrinkItemFields=''.join(shrinkItemFields), + expandItemFields=''.join(expandItemFields), + csv_columns=''.join(csv_columns), + inColumns=''.join(inColumns), + outColumns=''.join(outColumns), + columnFilters=''.join(columnFilters), + registerFilters=''.join(registerFilters), + sortColumns=''.join(sortColumns), + indexcolumn=allcolumns[index], + geometryGetter=geometryGetter, + bitArrayStores=''.join(bitArrayStores), +) + +f = open('model.go', 'w') +f.write(output) +f.close() +print('saved in model.go') +print('!!NOTE!! edit the default search filter') + + +mapsoutput = mapstemplate.render( + initRepeatColumns=''.join(initRepeatColumns), + repeatColumnNames=''.join(repeatColumnNames), + loadRepeatColumnNames=''.join(loadRepeatColumnNames), + registerColumns=''.join(registerColumns), + mappedColumns=''.join(mappedColumns), + shrinkVars=''.join(shrinkVars), +) + +f = open('model_maps.go', 'w') +f.write(mapsoutput) +f.close() +print('model hashmaps saved in model_maps.go') + +os.system("go fmt model.go") +os.system("go fmt model_maps.go") diff --git a/extras/templates/filters.jinja2 b/extras/templates/filters.jinja2 new file mode 100644 index 0000000..a63578a --- /dev/null +++ b/extras/templates/filters.jinja2 @@ -0,0 +1,21 @@ + +// contain filter {{column}} +func Filter{{column}}Contains(i *Item, s string) bool { + return strings.Contains({{lookup}}, s) +} + + +// startswith filter {{column}} +func Filter{{column}}StartsWith(i *Item, s string) bool { + return strings.HasPrefix({{lookup}}, s) +} + +// match filters {{column}} +func Filter{{column}}Match(i *Item, s string) bool { + return {{lookup}} == s +} + +// getter {{column}} +func Getters{{column}}(i *Item) string { + return {{lookup}} +} diff --git a/extras/templates/initColumn.template.jinja2 b/extras/templates/initColumn.template.jinja2 new file mode 100644 index 0000000..edf5b81 --- /dev/null +++ b/extras/templates/initColumn.template.jinja2 @@ -0,0 +1,5 @@ + + {{columnName}}Tracker = 0 + {{columnName}}IdxMap = make(fieldIdxMap) + {{columnName}} = make(fieldMapIdx) + diff --git a/extras/templates/model.template.jinja2 b/extras/templates/model.template.jinja2 new file mode 100644 index 0000000..b34adb3 --- /dev/null +++ b/extras/templates/model.template.jinja2 @@ -0,0 +1,262 @@ +/* + model.go define the 'items' to store. + All columns with getters and setters are defined here. + + ItemIn, represent rows from the Input data + Item, the compact item stored in memmory + ItemOut, defines how and which fields are exported out + of the API. It is possible to ignore input columns + + Repeated values are stored in maps with int numbers + as keys. Optionally bitarrays are created for reapeated + column values to do fast bit-wise filtering. + + A S2 geo index in created for lat, lon values. + + Unique values are stored as-is. + + The generated codes leaves room to create custom + index functions yourself to create an API with an + < 1 ms response time for your specific needs. + + This codebase solves: I need to have an API on this + tabular dataset fast! +*/ + +package main + +import ( + "encoding/json" + "sort" + "strconv" + "strings" + "errors" + + "github.com/Workiva/go-datastructures/bitarray" +) + +type registerGroupByFunc map[string]func(*Item) string +type registerGettersMap map[string]func(*Item) string +type registerReduce map[string]func(Items) map[string]string + +type registerBitArray map[string]func(s string) (bitarray.BitArray, error) +type fieldBitarrayMap map[uint32]bitarray.BitArray + +{{itemStructs}} + + +type ItemIn struct { + +{{columnsItemIn}} + +} + +type ItemOut struct { + +{{columnsItemOut}} + +} + + +type Item struct { + + Label int // internal index in ITEMS +{{columnsItem}} + +} + +func (i Item) MarshalJSON() ([]byte, error) { + return json.Marshal(i.Serialize()) +} + +// Shrink create smaller Item using uint32 +func (i ItemIn) Shrink(label int) Item { + +{{shrinkItems}} + + return Item{ + + label, + +{{shrinkItemFields}} + + } +} + +// Store selected columns in seperate map[columnvalue]bitarray +// for fast item selection +func (i *Item) StoreBitArrayColumns() { + {{ bitArrayStores }} +} + +func (i Item) Serialize() ItemOut { + return ItemOut{ + +{{expandItemFields}} + + } +} + +func (i ItemIn) Columns() []string { + return []string{ + +{{inColumns}} + + } +} + +func (i ItemOut) Columns() []string { + return []string{ + +{{outColumns}} + + } +} + + +func (i Item) Row() []string { + + return []string{ + +{{expandItemFields}} + + } +} + +func (i Item) GetIndex()string{ + return Getters{{indexcolumn}}(&i) +} + + +func (i Item) GetGeometry() string { + return {{geometryGetter}} +} + +{{columnFilters}} + +/* +// contain filters +func FilterEkeyContains(i *Item, s string) bool { + return strings.Contains(i.Ekey, s) +} + + +// startswith filters +func FilterEkeyStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Ekey, s) +} + + +// match filters +func FilterEkeyMatch(i *Item, s string) bool { + return i.Ekey == s +} + +// getters +func GettersEkey(i *Item) string { + return i.Ekey +} +*/ + +// reduce functions +func reduceCount(items Items) map[string]string { + result := make(map[string]string) + result["count"] = strconv.Itoa(len(items)) + return result +} + +type GroupedOperations struct { + Funcs registerFuncType + GroupBy registerGroupByFunc + Getters registerGettersMap + Reduce registerReduce + BitArrays registerBitArray +} + +var Operations GroupedOperations + +var RegisterFuncMap registerFuncType +var RegisterGroupBy registerGroupByFunc +var RegisterGetters registerGettersMap +var RegisterReduce registerReduce +var RegisterBitArray registerBitArray + +// ValidateRegsiters validate exposed columns do match filter names +func validateRegisters() error { + var i = ItemOut{} + var filters = []string{"match", "contains", "startswith"} + for _, c := range i.Columns() { + for _, f := range filters { + if _, ok := RegisterFuncMap[f+"-"+c]; !ok { + return errors.New(c + " is missing in RegisterMap") + } + } + } + return nil +} + +func init() { + + RegisterFuncMap = make(registerFuncType) + RegisterGroupBy = make(registerGroupByFunc) + RegisterGetters = make(registerGettersMap) + RegisterReduce = make(registerReduce) + + // register search filter. + //RegisterFuncMap["search"] = 'EDITYOURSELF' + // example RegisterFuncMap["search"] = FilterEkeyStartsWith + + //RegisterFuncMap["value"] = 'EDITYOURSELF' + // example RegisterGetters["value"] = GettersEkey + + // register filters + +{{registerFilters}} + + validateRegisters() + + /* + RegisterFuncMap["match-ekey"] = FilterEkeyMatch + RegisterFuncMap["contains-ekey"] = FilterEkeyContains + // register startswith filters + RegisterFuncMap["startswith-ekey"] = FilterEkeyStartsWith + // register getters + RegisterGetters["ekey"] = GettersEkey + // register groupby + RegisterGroupBy["ekey"] = GettersEkey + + */ + + // register reduce functions + RegisterReduce["count"] = reduceCount +} + +type sortLookup map[string]func(int, int) bool + +func createSort(items Items) sortLookup { + + sortFuncs := sortLookup{ + {{sortColumns}} + } + return sortFuncs +} + + +func sortBy(items Items, sortingL []string) (Items, []string) { + sortFuncs := createSort(items) + + for _, sortFuncName := range sortingL { + sortFunc, ok := sortFuncs[sortFuncName] + if ok { + sort.Slice(items, sortFunc) + } + } + + // TODO must be nicer way + keys := []string{} + for key := range sortFuncs { + keys = append(keys, key) + } + + return items, keys +} diff --git a/extras/templates/modelmap.template.jinja2 b/extras/templates/modelmap.template.jinja2 new file mode 100644 index 0000000..17b8b29 --- /dev/null +++ b/extras/templates/modelmap.template.jinja2 @@ -0,0 +1,49 @@ +/* + Transforming ItemsIn -> Items -> ItemsOut + Where Items has column values ar integers to save memmory + maps are needed to restore integers back to the actual values. + those are generated and stored here. +*/ + +package main + +type ModelMaps struct { + +{{mappedColumns}} + +} + +var BitArrays map[string]fieldBitarrayMap + +{{shrinkVars}} + + +func clearBitArrays() { + BitArrays = make(map[string]fieldBitarrayMap) +} + +func init() { + clearBitArrays() + setUpRepeatedColumns() +} + + +func setUpRepeatedColumns() { +{{initRepeatColumns}} +} + + +func CreateMapstore() ModelMaps { + return ModelMaps{ +{{repeatColumnNames}} + } +} + + +func LoadMapstore(m ModelMaps) { + +{{loadRepeatColumnNames}} + +{{registerColumns}} + +} diff --git a/extras/templates/registerFilters.jinja2 b/extras/templates/registerFilters.jinja2 new file mode 100644 index 0000000..52bba4b --- /dev/null +++ b/extras/templates/registerFilters.jinja2 @@ -0,0 +1,8 @@ + + //register filters for {{columnName}} + RegisterFuncMap["match-{{co}}"] = Filter{{columnName}}Match + RegisterFuncMap["contains-{{co}}"] = Filter{{columnName}}Contains + RegisterFuncMap["startswith-{{co}}"] = Filter{{columnName}}StartsWith + RegisterGetters["{{co}}"] = Getters{{columnName}} + RegisterGroupBy["{{co}}"] = Getters{{columnName}} + diff --git a/extras/templates/sortfunc.jinja2 b/extras/templates/sortfunc.jinja2 new file mode 100644 index 0000000..3773c7d --- /dev/null +++ b/extras/templates/sortfunc.jinja2 @@ -0,0 +1,4 @@ + + "{{co}}": func(i, j int) bool { return {{c1}} }, + "-{{co}}": func(i, j int) bool { return {{c2}} }, + diff --git a/geo.go b/geo.go new file mode 100644 index 0000000..74c9750 --- /dev/null +++ b/geo.go @@ -0,0 +1,193 @@ +/* + + Determine S2 cells involved in geometries. Provide a fast way to lookup + data from based on a geojson query. + + inspired by + "github.com/akhenakh/oureadb/index/geodata" + "github.com/akhenakh/oureadb/store" + + s2 cell index code. + + With S2 CillIDs we can find which items are contained in given + filter geometry (S2 cell union). + +*/ + +package main + +import ( + "fmt" + "github.com/go-spatial/geom" + "github.com/go-spatial/geom/encoding/wkt" + "github.com/golang/geo/s2" + "log" + "sort" + "strings" + // "sync" +) + +var minLevel int +var maxLevel int +var maxCells int + +// var s2Lock = sync.RWMutex{} + +type cellIndexNode struct { + ID s2.CellID + Label int +} + +type s2CellIndex []cellIndexNode +type s2CellMap map[int]s2.CellID + +// Implement Sort interface for s2CellIndex +func (c s2CellIndex) Len() int { return len(c) } +func (c s2CellIndex) Swap(i, j int) { c[i], c[j] = c[j], c[i] } +func (c s2CellIndex) Less(i, j int) bool { return c[i].ID < c[j].ID } + +var S2CELLS s2CellIndex +var S2CELLMAP s2CellMap + +func clearGeoIndex() { + S2CELLS = make(s2CellIndex, 0) + S2CELLMAP = s2CellMap{} +} + +func init() { + minLevel = 2 + maxLevel = 21 + maxCells = 450 + clearGeoIndex() +} + +func BuildGeoIndex() { + for i, v := range ITEMS { + err := v.GeoIndex(i) + if err != nil { + log.Println(err) + } + } + + defer S2CELLS.Sort() +} + +func (c cellIndexNode) IsEmpty() bool { + return c.ID == 0 +} + +// GeoIndex for each items determine S2Cell and store it. +func (i Item) GeoIndex(label int) error { + + if i.GetGeometry() == "" { + return fmt.Errorf("missing wkt geometry") + } + sreader := strings.NewReader(i.GetGeometry()) + g, err := wkt.Decode(sreader) + + if err != nil { + fmt.Println(err.Error()) + fmt.Println(i.GetGeometry()) + return fmt.Errorf("wkt error encountered with %s", i.GetGeometry()) + } + + p, err := geom.GetCoordinates(g) + if err != nil { + fmt.Println(err.Error()) + fmt.Println(i.GetGeometry()) + fmt.Printf("geom error encountered with %s", i.GetGeometry()) + return fmt.Errorf("geom error") + } + + // s2Lock.Lock() + // defer s2Lock.Unlock() + + y := p[0][0] + x := p[0][1] + ll := s2.LatLngFromDegrees(x, y) + + if !ll.IsValid() { + fmt.Println(i.GetGeometry()) + fmt.Printf("ll geom error encountered with %f %f", x, y) + return fmt.Errorf("geom error") + } + + center := s2.PointFromLatLng(ll) + cell := s2.CellFromPoint(center) + + cnode := cellIndexNode{ID: cell.ID(), Label: i.Label} + S2CELLS = append(S2CELLS, cnode) + S2CELLMAP[i.Label] = cell.ID() + + // Update index while loading data so queries already work + //if label%1000000 == 0 { + // S2CELLS.Sort() + //} + + return nil + +} + +type MatchedItems map[int]bool + +// from map to array remove duplicate matches +func matchesToArray(items *Items, matched MatchedItems) Items { + newItems := make(Items, 0) + for k := range matched { + newItems = append(newItems, (*items)[k]) + } + + return newItems +} + +// Simple search algo +func SearchOverlapItems(items *Items, cu s2.CellUnion) Items { + + matchedItems := make(MatchedItems) + + for i := range *items { + l := (*items)[i].Label + if cu.ContainsCellID(S2CELLMAP[l]) { + matchedItems[l] = true + } + } + + return matchesToArray(items, matchedItems) +} + +// Given only a cell Union return Items +func SearchGeoItems(cu s2.CellUnion) Items { + + matchedItems := make(map[int]bool) + + cu.Normalize() + + min := S2CELLS.Seek(cu[0].ChildBegin()) + max := S2CELLS.Seek(cu[len(cu)-1].ChildEnd()) + + for _, i := range S2CELLS[min : max+1] { + if cu.ContainsCellID(i.ID) { + matchedItems[i.Label] = true + } + } + return matchesToArray(&ITEMS, matchedItems) +} + +// Seek position in index which is close to target +func (ci s2CellIndex) Seek(target s2.CellID) int { + + pos := sort.Search(len(ci), func(i int) bool { + return ci[i].ID > target + }) - 1 + + // Ensure we don't go beyond the beginning. + if pos < 0 { + pos = 0 + } + return pos +} + +// Sort CellIndex so Binary search can work. +func (ci s2CellIndex) Sort() { + sort.Sort(ci) +} diff --git a/geom_s2cover.go b/geom_s2cover.go new file mode 100644 index 0000000..203d8fb --- /dev/null +++ b/geom_s2cover.go @@ -0,0 +1,161 @@ +/* + convert geojson to s2 cover + + derived from code found @ + github.com/akhenakh/oureadb +*/ + +package main + +import ( + "github.com/go-spatial/geom" + //"github.com/go-spatial/geom/encoding/geojson" + "fmt" + "github.com/golang/geo/s2" + "github.com/pkg/errors" +) + +//geoDataCoverCellUnion given geometry create an s2 cover for it +func geoDataCoverCellUnion(g geom.Geometry, coverer *s2.RegionCoverer, interior bool) (s2.CellUnion, error) { + + if geom.IsEmpty(g) { + fmt.Println("empty?") + return nil, errors.New("invalid geometry") + } + var cu s2.CellUnion + + switch gg := g.(type) { + case geom.Point: + points, _ := geom.GetCoordinates(gg) + c := s2.CellIDFromLatLng( + s2.LatLngFromDegrees(points[0][1], points[0][0]), + ) + cu = append(cu, c.Parent(coverer.MinLevel)) + + case geom.Polygon: + points, _ := geom.GetCoordinates(gg) + cup, err := coverPolygon(points, coverer, interior) + if err != nil { + return nil, errors.Wrap(err, "can't cover polygon") + } + cu = append(cu, cup...) + + case geom.MultiPolygon: + for _, p := range gg.Polygons() { + points, _ := geom.GetCoordinates(p) + cup, err := coverPolygon(points, coverer, interior) + if err != nil { + return nil, errors.Wrap(err, "can't cover multipolygon") + } + + cu = append(cu, cup...) + } + + case geom.LineString: + points, _ := geom.GetCoordinates(gg) + if len(points)%2 != 0 { + return nil, errors.New("invalid coordinates count for line") + } + + pl := make(s2.Polyline, len(points)) + for i := 0; i < len(points); i += 1 { + ll := s2.LatLngFromDegrees(points[i][1], points[i][0]) + pl[i] = s2.PointFromLatLng(ll) + } + + var cupl s2.CellUnion + if interior { + cupl = coverer.InteriorCellUnion(&pl) + } else { + cupl = coverer.CellUnion(&pl) + } + cu = append(cu, cupl...) + + default: + fmt.Println(gg) + return nil, errors.New("unsupported geojson data type") + } + + return cu, nil +} + +func CoverDefault(g geom.Geometry) s2.CellUnion { + + coverer := &s2.RegionCoverer{MinLevel: minLevel, MaxLevel: maxLevel, MaxCells: maxCells} + cu, err := Cover(g, coverer) + + // no cover for this geo object this is probably an error + if len(cu) == 0 || err != nil { + fmt.Println("geo object can't be indexed, empty cover") + fmt.Println(err) + } + return cu +} + +// Cover generates an s2 cover for GeoData gd +func Cover(g geom.Geometry, coverer *s2.RegionCoverer) (s2.CellUnion, error) { + return geoDataCoverCellUnion(g, coverer, false) +} + +// returns an s2 cover from a list of lng, lat forming a closed polygon +func coverPolygon(p []geom.Point, coverer *s2.RegionCoverer, interior bool) (s2.CellUnion, error) { + if len(p) < 3 { + return nil, errors.New("invalid polygons not enough coordinates for a closed polygon") + } + if len(p)%2 != 0 { + if p[0] == p[len(p)-1] { + //last element == first element + p = p[1:] + } else { + return nil, errors.New("invalid polygons odd coordinates number") + } + } + + l := LoopFromCoordinatesAndCCW(p, true) + if l.IsEmpty() || l.IsFull() { + return nil, errors.New("invalid polygons") + } + + // super hacky try reverse if ContainsOrigin + if l.ContainsOrigin() { + // reversing the slice + for i := len(p)/2 - 1; i >= 0; i-- { + opp := len(p) - 1 - i + p[i], p[opp] = p[opp], p[i] + } + } + + if interior { + return coverer.InteriorCovering(l), nil + } + return coverer.Covering(l), nil +} + +// LoopFromCoordinatesAndCCW creates a LoopFence from a list of lng lat +// if checkCCW is true also try to fix CCW +func LoopFromCoordinatesAndCCW(p []geom.Point, checkCCW bool) *s2.Loop { + if len(p)%2 != 0 || len(p) < 3 { + return nil + } + points := make([]s2.Point, len(p)) + + for i := 0; i < len(p); i += 1 { + points[i] = s2.PointFromLatLng(s2.LatLngFromDegrees(p[i][1], p[i][0])) + } + + if checkCCW && s2.RobustSign(points[0], points[1], points[2]) != s2.CounterClockwise { + // reversing the slice + for i := len(points)/2 - 1; i >= 0; i-- { + opp := len(points) - 1 - i + points[i], points[opp] = points[opp], points[i] + } + } + + if points[0] == points[len(points)-1] { + // remove last item if same as 1st + points = append(points[:len(points)-1], points[len(points)-1+1:]...) + } + + loop := s2.LoopFromPoints(points) + return loop +} diff --git a/http_handlers.go b/http_handlers.go index 26a1ede..3fbe35a 100644 --- a/http_handlers.go +++ b/http_handlers.go @@ -5,12 +5,14 @@ import ( "encoding/json" "fmt" "index/suffixarray" + // "io/ioutil" "log" "net/http" "runtime" "sort" "strconv" "strings" + "sync" "time" ) @@ -27,7 +29,6 @@ func setHeader(items Items, w http.ResponseWriter, query Query, queryTime int64) w.Header().Set("Content-Disposition", "attachment; filename=\"items.csv\"") w.Header().Set("Content-Type", "text/csv; charset=utf-8") } else { - w.Header().Set("Content-Type", "application/json") } @@ -38,11 +39,57 @@ func setHeader(items Items, w http.ResponseWriter, query Query, queryTime int64) } } +func hanleQueryError(err error, w http.ResponseWriter) { + response := make(map[string]string) + w.WriteHeader(500) + response["error"] = err.Error() + json.NewEncoder(w).Encode(response) +} + +type ReduceResult map[string]string +type GroupByResult map[string]ReduceResult + +var GroupByBodyCache = make(map[string]GroupByResult) +var GroupByHeaderCache = make(map[string]HeaderData) + +var cacheLock = sync.RWMutex{} + +// isCached try to find repsonse in cache (groupby only) +func isCached(w http.ResponseWriter, r *http.Request, query Query) bool { + cacheKey, err := query.CacheKey() + + if err == nil && len(query.GroupBy) > 0 && len(query.Reduce) > 0 { + cacheLock.RLock() + groupByResult, found := GroupByBodyCache[cacheKey] + headerCache, _ := GroupByHeaderCache[cacheKey] + cacheLock.RUnlock() + if found { + w.Header().Set("Content-Type", "application/json") + + for key, val := range headerCache { + w.Header().Set(key, val) + } + w.Header().Set("used-cache", "yes") + json.NewEncoder(w).Encode(groupByResult) + return found + } + } + return false +} + func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations GroupedOperations) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { - query := parseURLParameters(r) + query, err := parseURLParameters(r) + if err != nil { + hanleQueryError(err, w) + return + } - items, queryTime := runQuery(ITEMS, query, operations) + if isCached(w, r, query) { + return + } + + items, queryTime := runQuery(&ITEMS, query, operations) msg := fmt.Sprint("total: ", len(ITEMS), " hits: ", len(items), " time: ", queryTime, "ms ", "url: ", r.URL) fmt.Printf(NoticeColorN, msg) @@ -53,9 +100,17 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group setHeader(items, w, query, queryTime) - groupByS, groupByFound := r.URL.Query()["groupby"] + // We want to count all filtered items. + // and we do not have a groupby + if query.GroupBy == "" && query.Reduce != "" { + reduceFunc, _ := operations.Reduce[query.Reduce] + result := reduceFunc(items) + json.NewEncoder(w).Encode(result) + return + } - if !groupByFound { + // no groupby return all rows + if query.GroupBy == "" { if query.ReturnFormat == "csv" { writeCSV(items, w) } else { @@ -66,29 +121,32 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group return } - groupByItems := groupByRunner(items, groupByS[0]) + // groupby items on column + groupByItems := groupByRunner(items, query.GroupBy) items = nil - reduceName, reduceFound := r.URL.Query()["reduce"] - - if reduceFound { - result := make(map[string]map[string]string) - reduceFunc, reduceFuncFound := operations.Reduce[reduceName[0]] - if !reduceFuncFound { - json.NewEncoder(w).Encode(result) - return - } + if query.Reduce != "" { + result := make(GroupByResult) + reduceFunc, _ := operations.Reduce[query.Reduce] for key, val := range groupByItems { result[key] = reduceFunc(val) } groupByItems = nil if len(result) == 0 { - w.WriteHeader(404) return } + // Cache group-by reduce repsonse + cacheLock.Lock() + cacheKey, _ := query.CacheKey() + GroupByBodyCache[cacheKey] = result + headerData := getHeaderData(items, query, queryTime) + GroupByHeaderCache[cacheKey] = headerData + cacheLock.Unlock() + json.NewEncoder(w).Encode(result) + return } @@ -96,16 +154,10 @@ func contextListRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Group } } -func ItemChanWorker(itemChan ItemsChannel) { - for items := range itemChan { - ITEMS = append(ITEMS, items...) - } -} - func contextAddRest(JWTConig jwtConfig, itemChan ItemsChannel, operations GroupedOperations) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { jsonDecoder := json.NewDecoder(r.Body) - var items Items + var items ItemsIn err := jsonDecoder.Decode(&items) if err != nil { fmt.Println(err) @@ -117,7 +169,7 @@ func contextAddRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Groupe strictMode := SETTINGS.Get("strict-mode") == "y" for n, item := range items { - if (*item == Item{}) { + if (*item == ItemIn{}) { fmt.Printf("unable to process item %d of batch\n", n) if strictMode { fmt.Printf("strict mode stopping ingestion of batch\n") @@ -136,6 +188,8 @@ func rmRest(w http.ResponseWriter, r *http.Request) { ITEMS = make(Items, 0, 100*1000) msg := fmt.Sprint("removed items from database") fmt.Printf(WarningColorN, msg) + ITEMS = Items{} + go func() { time.Sleep(1 * time.Second) runtime.GC() @@ -207,6 +261,11 @@ func makeIndex() { func writeCSV(items Items, w http.ResponseWriter) { writer := csv.NewWriter(w) + + columns := ItemOut{}.Columns() + writer.Write(columns) + writer.Flush() + for i := range items { writer.Write(items[i].Row()) writer.Flush() @@ -216,9 +275,14 @@ func writeCSV(items Items, w http.ResponseWriter) { func loadRest(w http.ResponseWriter, r *http.Request) { storagename, _, retrievefunc, filename := handleInputStorage(r) + start := time.Now() msg := fmt.Sprintf("retrieving with: %s, with filename: %s", storagename, filename) fmt.Printf(WarningColorN, msg) - itemsAdded, err := retrievefunc(ITEMS, filename) + itemsAdded, err := retrievefunc(filename) + diff := time.Since(start) + msg = fmt.Sprint("loading time: ", diff) + fmt.Printf(WarningColorN, msg) + if err != nil { log.Printf("could not open %s reason %s", filename, err) w.Write([]byte("500 - could not load data")) @@ -256,6 +320,7 @@ func handleInputStorage(r *http.Request) (string, storageFunc, retrieveFunc, str } filename := fmt.Sprintf("%s.%s", FILENAME, storagename) + return storagename, storagefunc, retrievefunc, filename } @@ -269,12 +334,11 @@ func saveRest(w http.ResponseWriter, r *http.Request) { msg = fmt.Sprintf("storage method: %s filename: %s\n", storagename, filename) fmt.Printf(WarningColor, msg) - size, err := storagefunc(ITEMS, filename) + size, err := storagefunc(filename) if err != nil { fmt.Println("unable to write file reason:", err) w.WriteHeader(500) return - } msg = fmt.Sprintf("filname %s, filesize: %d mb\n", filename, size/1024/1025) fmt.Printf(WarningColor, msg) @@ -291,37 +355,6 @@ func validColumn(column string, columns []string) bool { return false } -// Other wise also known in mathematics as set but in http name it would be confused with the verb set. -//func UniqueValuesInColumn(w http.ResponseWriter, r *http.Request) { -// column := r.URL.Path[1:] -// response := make(map[string]string) -// if len(ITEMS) == 0 { -// response["message"] = fmt.Sprint("invalid input: ", column) -// w.WriteHeader(400) -// json.NewEncoder(w).Encode(response) -// return -// -// } -// validColumns := ITEMS[0].Columns() -// -// if !validColumn(column, validColumns) { -// w.WriteHeader(400) -// -// response["message"] = fmt.Sprint("invalid input: ", column) -// response["input"] = column -// response["valid input"] = strings.Join(validColumns, ", ") -// json.NewEncoder(w).Encode(response) -// return -// } -// set := make(map[string]bool) -// for item := range ITEMS { -// r := reflect.ValueOf(item) -// value := reflect.Indirect(r).FieldByName(column) -// valu -// set[value.Str()] = true -// } -// -//} type ShowItem struct { IsShow bool `json:"isShow"` Label string `json:"label"` @@ -334,20 +367,36 @@ type Meta struct { } type searchResponse struct { - Count int `json:"count"` - Data Items `json:"data"` - MMeta *Meta `json:"meta"` + Count int `json:"count"` + Data ItemsOut `json:"data"` + MMeta *Meta `json:"meta"` +} + +func outputItems(items Items) ItemsOut { + + itemsout := make(ItemsOut, 0, len(items)) + + for _, oneitem := range items { + orgItem := oneitem.Serialize() + itemsout = append(itemsout, &orgItem) + } + + return itemsout } func makeResp(items Items) searchResponse { + + itemsout := outputItems(items) + fields := []ShowItem{} - for _, column := range items[0].Columns() { + columns := ItemOut{}.Columns() + for _, column := range columns { fields = append(fields, ShowItem{IsShow: true, Name: column, Label: column}) } return searchResponse{ Count: len(items), - Data: items, + Data: itemsout, MMeta: &Meta{Fields: fields, View: "table"}, } } @@ -364,6 +413,10 @@ func corsEnabled(h http.Handler) http.Handler { w.Header().Set("Access-Control-Allow-Headers", "Page, Page-Size, Total-Pages, query, Total-Items, Query-Duration, Content-Type, X-CSRF-Token, Authorization") return } else { + // make sure items are not being modified during request + // otherwise wait.. + lock.RLock() + defer lock.RUnlock() h.ServeHTTP(w, r) } }) @@ -371,12 +424,16 @@ func corsEnabled(h http.Handler) http.Handler { } func passThrough(h http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // make sure items are not being modified during request + // otherwise wait.. + lock.RLock() + defer lock.RUnlock() h.ServeHTTP(w, r) - }) } func MIDDLEWARE(cors bool) func(http.Handler) http.Handler { + if cors { return corsEnabled } @@ -386,13 +443,20 @@ func MIDDLEWARE(cors bool) func(http.Handler) http.Handler { func contextSearchRest(JWTConig jwtConfig, itemChan ItemsChannel, operations GroupedOperations) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { - query := parseURLParameters(r) + query, err := parseURLParameters(r) + + if err != nil { + hanleQueryError(err, w) + return + } + + items, queryTime := runQuery(&ITEMS, query, operations) - items, queryTime := runQuery(ITEMS, query, operations) if len(items) == 0 { w.WriteHeader(404) return } + msg := fmt.Sprint("total: ", len(ITEMS), " hits: ", len(items), " time: ", queryTime, "ms ", "url: ", r.URL) fmt.Printf(NoticeColorN, msg) headerData := getHeaderData(items, query, queryTime) @@ -402,6 +466,7 @@ func contextSearchRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Gro } w.Header().Set("Content-Type", "application/json") + for key, val := range headerData { w.Header().Set(key, val) } @@ -416,18 +481,26 @@ func contextSearchRest(JWTConig jwtConfig, itemChan ItemsChannel, operations Gro func contextTypeAheadRest(JWTConig jwtConfig, itemChan ItemsChannel, operations GroupedOperations) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { - query := parseURLParameters(r) + query, err := parseURLParameters(r) + if err != nil { + hanleQueryError(err, w) + return + } + column := r.URL.Path[len("/typeahead/"):] if column[len(column)-1] == '/' { column = column[:len(column)-1] } - if _, ok := operations.Getters[column]; !ok { - w.WriteHeader(404) - w.Write([]byte("column is not found")) - return - } - results, queryTime := runTypeAheadQuery(ITEMS, column, query, operations) + /* + if _, ok := operations.Getters[column]; !ok { + w.WriteHeader(404) + w.Write([]byte("wrong column name")) + return + } + */ + + results, queryTime := runTypeAheadQuery(&ITEMS, column, query, operations) if len(results) == 0 { w.WriteHeader(404) return @@ -442,8 +515,8 @@ func contextTypeAheadRest(JWTConig jwtConfig, itemChan ItemsChannel, operations } w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(results) + results = nil } } @@ -477,7 +550,12 @@ func helpRest(w http.ResponseWriter, r *http.Request) { registerReduces = append(registerReduces, k) } - _, registeredSortings := sortBy(ITEMS, []string{}) + newItems := make(Items, 10) + for i := 0; i < 10; i++ { + newItems = append(newItems, ITEMS[i]) + } + + _, registeredSortings := sortBy(newItems, []string{}) sort.Strings(registeredFilters) sort.Strings(registeredExcludes) @@ -513,6 +591,5 @@ func helpRest(w http.ResponseWriter, r *http.Request) { fmt.Sprintf("typeahead use the name of the column in this case IP: http://%s/typeahead/ip/?starts-with=127&limit=15", host), } w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(response) } diff --git a/http_handlers_test.go b/http_handlers_test.go new file mode 100644 index 0000000..8a2ed67 --- /dev/null +++ b/http_handlers_test.go @@ -0,0 +1,184 @@ +/* + +# test some basic request handling. + + - typeahead: http://%s/list/?typeahead=ams&limit=10", host), + - search: http://%s/list/?search=ams&page=1&pagesize=1", host), + - search with limit: http://%s/list/?search=10&page=1&pagesize=10&limit=5", host), + - sorting: http://%s/list/?search=100&page=10&pagesize=100&sortby=-country", host), + - filtering: http://%s/list/?search=10&ontains=144&contains-case=10&page=1&pagesize=1", host), + - groupby: http://%s/list/?search=10&contains-case=10&groupby=country", host), + - aggregation: http://%s/list/?search=10&contains-case=10&groupby=country&reduce=count", host), + - chain the same filters: http://%s/list/?search=10&contains-case=127&contains-case=0&contains-case=1", host), + - typeahead use the name of the column in this case IP: http://%s/typeahead/ip/?starts-with=127&limit=15", host), + + +*/ +package main + +import ( + "encoding/json" + "fmt" + // "io" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" +) + +var handler http.Handler + +/* load some data 19 records*/ +func TestMain(m *testing.M) { + + defaultSettings() + + SETTINGS.Set( + "csv", "./testdata/dataselectie_vbo_energie_20210505.head.csv", + "test dataset") + + SETTINGS.Set("channelwait", "0.01s", "timeout for channel loading") + + loadcsv(itemChan) + close(itemChan) + ItemChanWorker(itemChan) + + handler = setupHandler() + + // Run the test + m.Run() +} + +func TestCsvLoading(t *testing.T) { + + size := len(ITEMS) + + if size != 10 { + t.Errorf("expected 10 ITEMS got %d", size) + } +} + +func TestBasicHandlers(t *testing.T) { + + if len(ITEMS) < 10 { + t.Error("no items") + } + + type testCase struct { + url string + expected string + } + + tests := []testCase{ + testCase{"/list/?search=1", "10"}, + testCase{"/typeahead/huisnummer/?search=1", "3"}, + testCase{"/typeahead/pid/?search=1", "2"}, + testCase{"/help/", ""}, + } + + for i := range tests { + req := httptest.NewRequest("GET", tests[i].url, nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + resp := w.Result() + if resp.StatusCode != 200 { + t.Errorf("request to %s failed", tests[i].url) + t.Error(resp) + } + + if tests[i].expected == "" { + continue + } + + if resp.Header.Get("Total-Items") != tests[i].expected { + t.Errorf("total hits mismatch from %s %s != %s", + tests[i].url, + tests[i].expected, + resp.Header.Get("Total-Items"), + ) + t.Error(resp) + } + } +} + +// Test geojson queries combined with groupby and reduce. +func TestGeoQuery(t *testing.T) { + + BuildGeoIndex() + + if len(ITEMS) < 10 { + t.Error("no items") + } + + if len(S2CELLS) == 0 { + t.Error("geo indexing failed") + } + + if len(S2CELLMAP) == 0 { + t.Error("geo indexing failed") + } + + data := url.Values{} + data.Set("groupby", "postcode") + data.Set("reduce", "count") + + geojson := fmt.Sprint(` +{ + "type": "Polygon", + "coordinates": [ + [ + [4.905321, 52.377706], + [4.90527, 52.377706], + [4.90527, 52.377869], + [4.905321, 52.377869], + [4.905321, 52.377706] + ] + ] +} + `) + data.Set("geojson", geojson) + + params := strings.NewReader(data.Encode()) + + req := httptest.NewRequest("POST", "/list/", params) + req.Header.Add("Content-Type", "application/x-www-form-urlencoded") + + w := httptest.NewRecorder() + + handler.ServeHTTP(w, req) + + resp := w.Result() + if resp.StatusCode != 200 { + t.Errorf("geo request to %s failed statuscode", req.URL) + t.Error(resp) + } + + headerQuery := resp.Header.Get("Query") + query := Query{} + json.Unmarshal([]byte(headerQuery), &query) + + if query.GeometryGiven != true { + t.Errorf("geo request to %s failed ", req.URL) + t.Error(resp.Header.Get("Query")) + // t.Error(resp.Header.Get("GeometryGiven")) + t.Error(resp.Body) + } + + if resp.Header.Get("Total-Items") != "7" { + t.Error("geo request count is not 7") + } + + // parse json GroupBy response + defer resp.Body.Close() + j := GroupByResult{} + err := json.NewDecoder(resp.Body).Decode(&j) + + if err != nil { + t.Error(err) + } + + if j["1011AB"]["count"] != "7" { + t.Error("geo request json response count is not 7") + } +} diff --git a/main.go b/main.go index 9e2c88c..9b4dba3 100644 --- a/main.go +++ b/main.go @@ -2,40 +2,19 @@ package main import ( "fmt" - "log" - "net/http" // "runtime/debug" "github.com/pkg/profile") //"github.com/prometheus/client_golang/prometheus" //"github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" + "log" + "net/http" // "runtime/debug" "github.com/pkg/profile") + "time" ) -type filterFuncc func(*Item, string) bool -type registerFuncType map[string]filterFuncc -type registerGroupByFunc map[string]func(*Item) string -type registerGettersMap map[string]func(*Item) string -type registerReduce map[string]func(Items) map[string]string -type filterType map[string][]string -type formatRespFunc func(w http.ResponseWriter, r *http.Request, items Items) -type registerFormatMap map[string]formatRespFunc - -//Items as Example -type Items []*Item - -type ItemsGroupedBy map[string]Items -type ItemsChannel chan Items - -var ITEMS Items - type jwtConfig struct { Enabled bool SharedSecret string } -type storageFunc func(Items, string) (int64, error) -type retrieveFunc func(Items, string) (int, error) -type storageFuncs map[string]storageFunc -type retrieveFuncs map[string]retrieveFunc - // Colors are fun, and can be used to note that this is joyfull and fun project. const ( InfoColor = "\033[1;34m%s\033[0m" @@ -52,27 +31,42 @@ const ( ) func init() { - + itemChan = make(ItemsChannel, 1000) } func loadcsv(itemChan ItemsChannel) { log.Print("loading given csv") + fmt.Println(SETTINGS.Get("delimiter")) err := importCSV(SETTINGS.Get("csv"), itemChan, - true, true, + false, true, SETTINGS.Get("delimiter"), SETTINGS.Get("null-delimiter")) + if err != nil { - log.Fatal(err) + log.Print(err) } - makeIndex() + + // make sure channels are empty + // add timeout there is no garantee ItemsChannel + // is empty and you miss a few records + timeout, _ := time.ParseDuration(SETTINGS.Get("channelwait")) + time.Sleep(timeout) + // S2CELLS.Sort() + fmt.Println("csv imported") + + // Empty cache. should be made more generic + cacheLock.Lock() + defer cacheLock.Unlock() + GroupByBodyCache = make(map[string]GroupByResult) + GroupByHeaderCache = make(map[string]HeaderData) } -func main() { - SETTINGS.Set("http_db_host", "0.0.0.0:8128", "host with port") +func defaultSettings() { + SETTINGS.Set("http_db_host", "0.0.0.0:8000", "host with port") SETTINGS.Set("SHAREDSECRET", "", "jwt shared secret") SETTINGS.Set("JWTENABLED", "y", "JWT enabled") - SETTINGS.Set("CORS", "n", "CORS enabled") + SETTINGS.Set("CORS", "y", "CORS enabled") SETTINGS.Set("csv", "", "load a gzipped csv file on starup") SETTINGS.Set("null-delimiter", "\\N", "null delimiter") @@ -85,16 +79,22 @@ func main() { SETTINGS.Set("strict-mode", "y", "strict mode does not allow ingestion of invalid items and will reject the batch") SETTINGS.Set("prometheus-monitoring", "n", "add promethues monitoring endpoint") - SETTINGS.Set("STORAGEMETHOD", "bytes", "Storagemethod available options are json, jsonz, bytes, bytesz") + SETTINGS.Set("STORAGEMETHOD", "bytesz", "Storagemethod available options are json, jsonz, bytes, bytesz") SETTINGS.Set("LOADATSTARTUP", "n", "Load data at startup. ('y', 'n')") - SETTINGS.Parse() - //Construct yes or no to booleans in SETTINGS + SETTINGS.Set("readonly", "yes", "only allow read only funcions") + SETTINGS.Set("debug", "no", "print memory usage") + + SETTINGS.Set("groupbycache", "yes", "use in memory cache") + + SETTINGS.Set("channelwait", "5s", "timeout") - ITEMS = make(Items, 0, 100*1000) + SETTINGS.Parse() +} - Operations = GroupedOperations{Funcs: RegisterFuncMap, GroupBy: RegisterGroupBy, Getters: RegisterGetters, Reduce: RegisterReduce} - itemChan := make(ItemsChannel, 1000) +func main() { + + defaultSettings() go ItemChanWorker(itemChan) @@ -110,18 +110,38 @@ func main() { fmt.Println("start loading") go loadAtStart(SETTINGS.Get("STORAGEMETHOD"), FILENAME, SETTINGS.Get("indexed") == "y") } + + ipPort := SETTINGS.Get("http_db_host") + + mux := setupHandler() + + msg := fmt.Sprint( + "starting server\nhost: ", + ipPort, + ) + fmt.Printf(InfoColorN, msg) + log.Fatal(http.ListenAndServe(ipPort, mux)) +} + +func setupHandler() http.Handler { + JWTConfig := jwtConfig{ Enabled: SETTINGS.Get("JWTENABLED") == "yes", SharedSecret: SETTINGS.Get("SHAREDSECRET"), } - listRest := contextListRest(JWTConfig, itemChan, Operations) - addRest := contextAddRest(JWTConfig, itemChan, Operations) + Operations = GroupedOperations{ + Funcs: RegisterFuncMap, + GroupBy: RegisterGroupBy, + Getters: RegisterGetters, + Reduce: RegisterReduce, + BitArrays: RegisterBitArray, + } searchRest := contextSearchRest(JWTConfig, itemChan, Operations) typeAheadRest := contextTypeAheadRest(JWTConfig, itemChan, Operations) - - ipPort := SETTINGS.Get("http_db_host") + listRest := contextListRest(JWTConfig, itemChan, Operations) + addRest := contextAddRest(JWTConfig, itemChan, Operations) mux := http.NewServeMux() @@ -130,26 +150,33 @@ func main() { mux.HandleFunc("/list/", listRest) mux.HandleFunc("/help/", helpRest) + mux.Handle("/", http.FileServer(http.Dir("./files/www"))) + mux.Handle("/dsm-search", http.FileServer(http.Dir("./files/www"))) + if SETTINGS.Get("mgmt") == "y" { mux.HandleFunc("/mgmt/add/", addRest) mux.HandleFunc("/mgmt/rm/", rmRest) mux.HandleFunc("/mgmt/save/", saveRest) mux.HandleFunc("/mgmt/load/", loadRest) - - mux.Handle("/", http.FileServer(http.Dir("./files/www"))) - mux.Handle("/dsm-search", http.FileServer(http.Dir("./files/www"))) } if SETTINGS.Get("prometheus-monitoring") == "y" { mux.Handle("/metrics", promhttp.Handler()) } + fmt.Println("indexed: ", SETTINGS.Get("indexed")) cors := SETTINGS.Get("CORS") == "y" - msg := fmt.Sprint("starting server\nhost: ", ipPort, " with:", len(ITEMS), "items ", "management api's: ", SETTINGS.Get("mgmt") == "y", " jwt enabled: ", JWTConfig.Enabled, " monitoring: ", SETTINGS.Get("prometheus-monitoring") == "yes", " CORS: ", cors) + middleware := MIDDLEWARE(cors) + + msg := fmt.Sprint( + "setup http handler:", + " with:", len(ITEMS), "items ", + "management api's: ", SETTINGS.Get("mgmt") == "y", + " jwt enabled: ", JWTConfig.Enabled, " monitoring: ", SETTINGS.Get("prometheus-monitoring") == "yes", " CORS: ", cors) + fmt.Printf(InfoColorN, msg) - middleware := MIDDLEWARE(cors) - log.Fatal(http.ListenAndServe(ipPort, middleware(mux))) + return middleware(mux) } diff --git a/model.go b/model.go index 7d2b556..8c4adba 100644 --- a/model.go +++ b/model.go @@ -1,184 +1,1128 @@ +/* + model.go define the 'items' to store. + All columns with getters and setters are defined here. + + ItemIn, represent rows from the Input data + Item, the compact item stored in memmory + ItemOut, defines how and which fields are exported out + of the API. It is possible to ignore input columns + + Repeated values are stored in maps with int numbers + as keys. Optionally bitarrays are created for reapeated + column values to do fast bit-wise filtering. + + A S2 geo index in created for lat, lon values. + + Unique values are stored as-is. + + The generated codes leaves room to create custom + index functions yourself to create an API with an + < 1 ms response time for your specific needs. + + This codebase solves: I need to have an API on this + tabular dataset fast! +*/ + package main import ( + "encoding/json" + "errors" "sort" "strconv" "strings" + + "github.com/Workiva/go-datastructures/bitarray" ) +type registerGroupByFunc map[string]func(*Item) string +type registerGettersMap map[string]func(*Item) string +type registerReduce map[string]func(Items) map[string]string + +type registerBitArray map[string]func(s string) (bitarray.BitArray, error) +type fieldBitarrayMap map[uint32]bitarray.BitArray + +type ItemIn struct { + Pid string `json:"pid"` + Vid string `json:"vid"` + Numid string `json:"numid"` + Straat string `json:"straat"` + Postcode string `json:"postcode"` + Huisnummer string `json:"huisnummer"` + Huisletter string `json:"huisletter"` + Huisnummertoevoeging string `json:"huisnummertoevoeging"` + Oppervlakte string `json:"oppervlakte"` + Woningequivalent string `json:"woningequivalent"` + WoningType string `json:"woning_type"` + LabelscoreVoorlopig string `json:"labelscore_voorlopig"` + LabelscoreDefinitief string `json:"labelscore_definitief"` + Energieklasse string `json:"energieklasse"` + Gemeentecode string `json:"gemeentecode"` + Gemeentenaam string `json:"gemeentenaam"` + Buurtcode string `json:"buurtcode"` + Buurtnaam string `json:"buurtnaam"` + Wijkcode string `json:"wijkcode"` + Wijknaam string `json:"wijknaam"` + Provinciecode string `json:"provinciecode"` + Provincienaam string `json:"provincienaam"` + Point string `json:"point"` + PandGasEanAansluitingen string `json:"pand_gas_ean_aansluitingen"` + GroupId2020 string `json:"group_id_2020"` + P6GasAansluitingen2020 string `json:"p6_gas_aansluitingen_2020"` + P6Gasm32020 string `json:"p6_gasm3_2020"` + P6Kwh2020 string `json:"p6_kwh_2020"` + P6TotaalPandoppervlakM2 string `json:"p6_totaal_pandoppervlak_m2"` + PandBouwjaar string `json:"pand_bouwjaar"` + PandGasAansluitingen string `json:"pand_gas_aansluitingen"` + Gebruiksdoelen string `json:"gebruiksdoelen"` +} + +type ItemOut struct { + Pid string `json:"pid"` + Vid string `json:"vid"` + Numid string `json:"numid"` + Straat string `json:"straat"` + Postcode string `json:"postcode"` + Huisnummer string `json:"huisnummer"` + Huisletter string `json:"huisletter"` + Huisnummertoevoeging string `json:"huisnummertoevoeging"` + Adres string `json:"adres"` // should be removed soon + Oppervlakte string `json:"oppervlakte"` + Woningequivalent string `json:"woningequivalent"` + WoningType string `json:"woning_type"` + LabelscoreVoorlopig string `json:"labelscore_voorlopig"` + LabelscoreDefinitief string `json:"labelscore_definitief"` + Energieklasse string `json:"energieklasse"` + Gemeentecode string `json:"gemeentecode"` + Gemeentenaam string `json:"gemeentenaam"` + Buurtcode string `json:"buurtcode"` + Buurtnaam string `json:"buurtnaam"` + Wijkcode string `json:"wijkcode"` + Wijknaam string `json:"wijknaam"` + Provinciecode string `json:"provinciecode"` + Provincienaam string `json:"provincienaam"` + Point string `json:"point"` + PandGasEanAansluitingen string `json:"pand_gas_ean_aansluitingen"` + GroupId2020 string `json:"group_id_2020"` + P6GasAansluitingen2020 string `json:"p6_gas_aansluitingen_2020"` + P6Gasm32020 string `json:"p6_gasm3_2020"` + P6Kwh2020 string `json:"p6_kwh_2020"` + P6TotaalPandoppervlakM2 string `json:"p6_totaal_pandoppervlak_m2"` + PandBouwjaar string `json:"pand_bouwjaar"` + PandGasAansluitingen string `json:"pand_gas_aansluitingen"` + Gebruiksdoelen string `json:"gebruiksdoelen"` +} + type Item struct { - Tconst string `json:"tconst"` - Titletype string `json:"titletype"` - Primarytitle string `json:"primarytitle"` - Originaltitle string `json:"originaltitle"` - Isadult string `json:"isadult"` - Startyear string `json:"startyear"` - Endyear string `json:"endyear"` - Runtimeminutes string `json:"runtimeminutes"` - Genres string `json:"genres"` -} - -func (i Item) Columns() []string { + Label int // internal index in ITEMS + Pid uint32 + Vid uint32 + Numid string + Straat uint32 + Postcode uint32 + Huisnummer uint32 + Huisletter uint32 + Huisnummertoevoeging uint32 + Oppervlakte uint32 + Woningequivalent uint32 + WoningType uint32 + LabelscoreVoorlopig uint32 + LabelscoreDefinitief uint32 + Energieklasse uint32 + Gemeentecode uint32 + Gemeentenaam uint32 + Buurtcode uint32 + Buurtnaam uint32 + Wijkcode uint32 + Wijknaam uint32 + Provinciecode uint32 + Provincienaam uint32 + Point string + PandGasEanAansluitingen uint32 + GroupId2020 string + P6GasAansluitingen2020 uint32 + P6Gasm32020 uint32 + P6Kwh2020 uint32 + P6TotaalPandoppervlakM2 uint32 + PandBouwjaar uint32 + PandGasAansluitingen uint32 + Gebruiksdoelen []uint32 +} + +func (i Item) MarshalJSON() ([]byte, error) { + return json.Marshal(i.Serialize()) +} + +// Shrink create smaller Item using uint32 +func (i ItemIn) Shrink(label int) Item { + + Pid.Store(i.Pid) + Vid.Store(i.Vid) + Straat.Store(i.Straat) + Postcode.Store(i.Postcode) + Huisnummer.Store(i.Huisnummer) + Huisletter.Store(i.Huisletter) + Huisnummertoevoeging.Store(i.Huisnummertoevoeging) + Oppervlakte.Store(i.Oppervlakte) + Woningequivalent.Store(i.Woningequivalent) + WoningType.Store(i.WoningType) + LabelscoreVoorlopig.Store(i.LabelscoreVoorlopig) + LabelscoreDefinitief.Store(i.LabelscoreDefinitief) + Energieklasse.Store(i.Energieklasse) + Gemeentecode.Store(i.Gemeentecode) + Gemeentenaam.Store(i.Gemeentenaam) + Buurtcode.Store(i.Buurtcode) + Buurtnaam.Store(i.Buurtnaam) + Wijkcode.Store(i.Wijkcode) + Wijknaam.Store(i.Wijknaam) + Provinciecode.Store(i.Provinciecode) + Provincienaam.Store(i.Provincienaam) + PandGasEanAansluitingen.Store(i.PandGasEanAansluitingen) + P6GasAansluitingen2020.Store(i.P6GasAansluitingen2020) + P6Gasm32020.Store(i.P6Gasm32020) + P6Kwh2020.Store(i.P6Kwh2020) + P6TotaalPandoppervlakM2.Store(i.P6TotaalPandoppervlakM2) + PandBouwjaar.Store(i.PandBouwjaar) + PandGasAansluitingen.Store(i.PandGasAansluitingen) + + doelen := Gebruiksdoelen.StoreArray(i.Gebruiksdoelen) + + return Item{ + + label, + + Pid.GetIndex(i.Pid), + Vid.GetIndex(i.Vid), + i.Numid, + Straat.GetIndex(i.Straat), + Postcode.GetIndex(i.Postcode), + Huisnummer.GetIndex(i.Huisnummer), + Huisletter.GetIndex(i.Huisletter), + Huisnummertoevoeging.GetIndex(i.Huisnummertoevoeging), + Oppervlakte.GetIndex(i.Oppervlakte), + Woningequivalent.GetIndex(i.Woningequivalent), + WoningType.GetIndex(i.WoningType), + LabelscoreVoorlopig.GetIndex(i.LabelscoreVoorlopig), + LabelscoreDefinitief.GetIndex(i.LabelscoreDefinitief), + Energieklasse.GetIndex(i.Energieklasse), + Gemeentecode.GetIndex(i.Gemeentecode), + Gemeentenaam.GetIndex(i.Gemeentenaam), + Buurtcode.GetIndex(i.Buurtcode), + Buurtnaam.GetIndex(i.Buurtnaam), + Wijkcode.GetIndex(i.Wijkcode), + Wijknaam.GetIndex(i.Wijknaam), + Provinciecode.GetIndex(i.Provinciecode), + Provincienaam.GetIndex(i.Provincienaam), + i.Point, + PandGasEanAansluitingen.GetIndex(i.PandGasEanAansluitingen), + i.GroupId2020, + P6GasAansluitingen2020.GetIndex(i.P6GasAansluitingen2020), + P6Gasm32020.GetIndex(i.P6Gasm32020), + P6Kwh2020.GetIndex(i.P6Kwh2020), + P6TotaalPandoppervlakM2.GetIndex(i.P6TotaalPandoppervlakM2), + PandBouwjaar.GetIndex(i.PandBouwjaar), + PandGasAansluitingen.GetIndex(i.PandGasAansluitingen), + doelen, + } +} + +// Store selected columns in seperate map[columnvalue]bitarray +// for fast item selection +func (i *Item) StoreBitArrayColumns() { + + SetBitArray("pid", i.Pid, i.Label) + SetBitArray("postcode", i.Postcode, i.Label) + SetBitArray("woning_type", i.WoningType, i.Label) + SetBitArray("labelscore_voorlopig", i.LabelscoreVoorlopig, i.Label) + SetBitArray("labelscore_definitief", i.LabelscoreDefinitief, i.Label) + SetBitArray("energieklasse", i.Energieklasse, i.Label) + SetBitArray("gemeentecode", i.Gemeentecode, i.Label) + SetBitArray("buurtcode", i.Buurtcode, i.Label) + SetBitArray("wijkcode", i.Wijkcode, i.Label) + SetBitArray("provinciecode", i.Provinciecode, i.Label) + +} + +func (i Item) Serialize() ItemOut { + return ItemOut{ + + Pid.GetValue(i.Pid), + Vid.GetValue(i.Vid), + i.Numid, + Straat.GetValue(i.Straat), + Postcode.GetValue(i.Postcode), + Huisnummer.GetValue(i.Huisnummer), + Huisletter.GetValue(i.Huisletter), + Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging), + GetAdres(&i), + Oppervlakte.GetValue(i.Oppervlakte), + Woningequivalent.GetValue(i.Woningequivalent), + WoningType.GetValue(i.WoningType), + LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), + LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief), + Energieklasse.GetValue(i.Energieklasse), + Gemeentecode.GetValue(i.Gemeentecode), + Gemeentenaam.GetValue(i.Gemeentenaam), + Buurtcode.GetValue(i.Buurtcode), + Buurtnaam.GetValue(i.Buurtnaam), + Wijkcode.GetValue(i.Wijkcode), + Wijknaam.GetValue(i.Wijknaam), + Provinciecode.GetValue(i.Provinciecode), + Provincienaam.GetValue(i.Provincienaam), + i.Point, + PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen), + i.GroupId2020, + P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020), + P6Gasm32020.GetValue(i.P6Gasm32020), + P6Kwh2020.GetValue(i.P6Kwh2020), + P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2), + PandBouwjaar.GetValue(i.PandBouwjaar), + PandGasAansluitingen.GetValue(i.PandGasAansluitingen), + Gebruiksdoelen.GetArrayValue(i.Gebruiksdoelen), + } +} + +func (i ItemIn) Columns() []string { return []string{ - "tconst", - "titletype", - "primarytitle", - "originaltitle", - "isadult", - "startyear", - "endyear", - "runtimeminutes", - "genres", + + "pid", + "vid", + "numid", + "straat", + "postcode", + "huisnummer", + "huisletter", + "huisnummertoevoeging", + "oppervlakte", + "woningequivalent", + "woning_type", + "labelscore_voorlopig", + "labelscore_definitief", + "energieklasse", + "gemeentecode", + "gemeentenaam", + "buurtcode", + "buurtnaam", + "wijkcode", + "wijknaam", + "provinciecode", + "provincienaam", + "point", + "pand_gas_ean_aansluitingen", + "group_id_2020", + "p6_gas_aansluitingen_2020", + "p6_gasm3_2020", + "p6_kwh_2020", + "p6_totaal_pandoppervlak_m2", + "pand_bouwjaar", + "pand_gas_aansluitingen", + "gebruiksdoelen", + } +} + +func (i ItemOut) Columns() []string { + return []string{ + + "pid", + "vid", + "numid", + "straat", + "postcode", + "huisnummer", + "huisletter", + "huisnummertoevoeging", + "oppervlakte", + "woningequivalent", + "woning_type", + "labelscore_voorlopig", + "labelscore_definitief", + "energieklasse", + "gemeentecode", + "gemeentenaam", + "buurtcode", + "buurtnaam", + "wijkcode", + "wijknaam", + "provinciecode", + "provincienaam", + "point", + "pand_gas_ean_aansluitingen", + "group_id_2020", + "p6_gas_aansluitingen_2020", + "p6_gasm3_2020", + "p6_kwh_2020", + "p6_totaal_pandoppervlak_m2", + "pand_bouwjaar", + "pand_gas_aansluitingen", + "gebruiksdoelen", } } func (i Item) Row() []string { + return []string{ - i.Tconst, - i.Titletype, - i.Primarytitle, - i.Originaltitle, - i.Isadult, - i.Startyear, - i.Endyear, - i.Runtimeminutes, - i.Genres, + + Pid.GetValue(i.Pid), + Vid.GetValue(i.Vid), + i.Numid, + Straat.GetValue(i.Straat), + Postcode.GetValue(i.Postcode), + Huisnummer.GetValue(i.Huisnummer), + Huisletter.GetValue(i.Huisletter), + Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging), + Oppervlakte.GetValue(i.Oppervlakte), + Woningequivalent.GetValue(i.Woningequivalent), + WoningType.GetValue(i.WoningType), + LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), + LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief), + Energieklasse.GetValue(i.Energieklasse), + Gemeentecode.GetValue(i.Gemeentecode), + Gemeentenaam.GetValue(i.Gemeentenaam), + Buurtcode.GetValue(i.Buurtcode), + Buurtnaam.GetValue(i.Buurtnaam), + Wijkcode.GetValue(i.Wijkcode), + Wijknaam.GetValue(i.Wijknaam), + Provinciecode.GetValue(i.Provinciecode), + Provincienaam.GetValue(i.Provincienaam), + i.Point, + PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen), + i.GroupId2020, + P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020), + P6Gasm32020.GetValue(i.P6Gasm32020), + P6Kwh2020.GetValue(i.P6Kwh2020), + P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2), + PandBouwjaar.GetValue(i.PandBouwjaar), + PandGasAansluitingen.GetValue(i.PandGasAansluitingen), + Gebruiksdoelen.GetArrayValue(i.Gebruiksdoelen), } } func (i Item) GetIndex() string { - return i.Tconst + return GettersPid(&i) } -// contain filters -func FilterTconstContains(i *Item, s string) bool { - return strings.Contains(i.Tconst, s) +func (i Item) GetGeometry() string { + return GettersPoint(&i) } -func FilterTitletypeContains(i *Item, s string) bool { - return strings.Contains(i.Titletype, s) + +// contain filter Pid +func FilterPidContains(i *Item, s string) bool { + return strings.Contains(Pid.GetValue(i.Pid), s) } -func FilterPrimarytitleContains(i *Item, s string) bool { - return strings.Contains(i.Primarytitle, s) + +// startswith filter Pid +func FilterPidStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Pid.GetValue(i.Pid), s) } -func FilterOriginaltitleContains(i *Item, s string) bool { - return strings.Contains(i.Originaltitle, s) + +// match filters Pid +func FilterPidMatch(i *Item, s string) bool { + return Pid.GetValue(i.Pid) == s } -func FilterIsadultContains(i *Item, s string) bool { - return strings.Contains(i.Isadult, s) + +// getter Pid +func GettersPid(i *Item) string { + return Pid.GetValue(i.Pid) } -func FilterStartyearContains(i *Item, s string) bool { - return strings.Contains(i.Startyear, s) + +// contain filter Vid +func FilterVidContains(i *Item, s string) bool { + return strings.Contains(Vid.GetValue(i.Vid), s) } -func FilterEndyearContains(i *Item, s string) bool { - return strings.Contains(i.Endyear, s) + +// startswith filter Vid +func FilterVidStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Vid.GetValue(i.Vid), s) } -func FilterRuntimeminutesContains(i *Item, s string) bool { - return strings.Contains(i.Runtimeminutes, s) + +// match filters Vid +func FilterVidMatch(i *Item, s string) bool { + return Vid.GetValue(i.Vid) == s } -func FilterGenresContains(i *Item, s string) bool { - return strings.Contains(i.Genres, s) + +// getter Vid +func GettersVid(i *Item) string { + return Vid.GetValue(i.Vid) } -// startswith filters -func FilterTconstStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Tconst, s) +// contain filter Numid +func FilterNumidContains(i *Item, s string) bool { + return strings.Contains(i.Numid, s) } -func FilterTitletypeStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Titletype, s) + +// startswith filter Numid +func FilterNumidStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Numid, s) } -func FilterPrimarytitleStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Primarytitle, s) + +// match filters Numid +func FilterNumidMatch(i *Item, s string) bool { + return i.Numid == s } -func FilterOriginaltitleStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Originaltitle, s) + +// getter Numid +func GettersNumid(i *Item) string { + return i.Numid } -func FilterIsadultStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Isadult, s) + +// contain filter Straat +func FilterStraatContains(i *Item, s string) bool { + return strings.Contains(Straat.GetValue(i.Straat), s) } -func FilterStartyearStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Startyear, s) + +// startswith filter Straat +func FilterStraatStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Straat.GetValue(i.Straat), s) } -func FilterEndyearStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Endyear, s) + +// match filters Straat +func FilterStraatMatch(i *Item, s string) bool { + return Straat.GetValue(i.Straat) == s } -func FilterRuntimeminutesStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Runtimeminutes, s) + +// getter Straat +func GettersStraat(i *Item) string { + return Straat.GetValue(i.Straat) } -func FilterGenresStartsWith(i *Item, s string) bool { - return strings.HasPrefix(i.Genres, s) + +// contain filter Postcode +func FilterPostcodeContains(i *Item, s string) bool { + return strings.Contains(Postcode.GetValue(i.Postcode), s) } -// match filters -func FilterTconstMatch(i *Item, s string) bool { - return i.Tconst == s +// startswith filter Postcode +func FilterPostcodeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Postcode.GetValue(i.Postcode), s) } -func FilterTitletypeMatch(i *Item, s string) bool { - return i.Titletype == s + +// match filters Postcode +func FilterPostcodeMatch(i *Item, s string) bool { + return Postcode.GetValue(i.Postcode) == s } -func FilterPrimarytitleMatch(i *Item, s string) bool { - return i.Primarytitle == s + +// getter Postcode +func GettersPostcode(i *Item) string { + return Postcode.GetValue(i.Postcode) } -func FilterOriginaltitleMatch(i *Item, s string) bool { - return i.Originaltitle == s + +// contain filter Huisnummer +func FilterHuisnummerContains(i *Item, s string) bool { + return strings.Contains(Huisnummer.GetValue(i.Huisnummer), s) } -func FilterIsadultMatch(i *Item, s string) bool { - return i.Isadult == s + +// startswith filter Huisnummer +func FilterHuisnummerStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Huisnummer.GetValue(i.Huisnummer), s) } -func FilterStartyearMatch(i *Item, s string) bool { - return i.Startyear == s + +// match filters Huisnummer +func FilterHuisnummerMatch(i *Item, s string) bool { + return Huisnummer.GetValue(i.Huisnummer) == s } -func FilterEndyearMatch(i *Item, s string) bool { - return i.Endyear == s + +// getter Huisnummer +func GettersHuisnummer(i *Item) string { + return Huisnummer.GetValue(i.Huisnummer) } -func FilterRuntimeminutesMatch(i *Item, s string) bool { - return i.Runtimeminutes == s + +// contain filter Huisletter +func FilterHuisletterContains(i *Item, s string) bool { + return strings.Contains(Huisletter.GetValue(i.Huisletter), s) } -func FilterGenresMatch(i *Item, s string) bool { - return i.Genres == s + +// startswith filter Huisletter +func FilterHuisletterStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Huisletter.GetValue(i.Huisletter), s) } -// reduce functions +// match filters Huisletter +func FilterHuisletterMatch(i *Item, s string) bool { + return Huisletter.GetValue(i.Huisletter) == s +} -func reduceCount(items Items) map[string]string { - result := make(map[string]string) - result["count"] = strconv.Itoa(len(items)) - return result +// getter Huisletter +func GettersHuisletter(i *Item) string { + return Huisletter.GetValue(i.Huisletter) } -// getters -func GettersTconst(i *Item) string { - return i.Tconst +// contain filter Huisnummertoevoeging +func FilterHuisnummertoevoegingContains(i *Item, s string) bool { + return strings.Contains(Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging), s) +} + +// startswith filter Huisnummertoevoeging +func FilterHuisnummertoevoegingStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging), s) +} + +// match filters Huisnummertoevoeging +func FilterHuisnummertoevoegingMatch(i *Item, s string) bool { + return Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging) == s +} + +// getter Huisnummertoevoeging +func GettersHuisnummertoevoeging(i *Item) string { + return Huisnummertoevoeging.GetValue(i.Huisnummertoevoeging) +} + +// contain filter Oppervlakte +func FilterOppervlakteContains(i *Item, s string) bool { + return strings.Contains(Oppervlakte.GetValue(i.Oppervlakte), s) +} + +// startswith filter Oppervlakte +func FilterOppervlakteStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Oppervlakte.GetValue(i.Oppervlakte), s) +} + +// match filters Oppervlakte +func FilterOppervlakteMatch(i *Item, s string) bool { + return Oppervlakte.GetValue(i.Oppervlakte) == s +} + +// getter Oppervlakte +func GettersOppervlakte(i *Item) string { + return Oppervlakte.GetValue(i.Oppervlakte) +} + +// contain filter Woningequivalent +func FilterWoningequivalentContains(i *Item, s string) bool { + return strings.Contains(Woningequivalent.GetValue(i.Woningequivalent), s) +} + +// startswith filter Woningequivalent +func FilterWoningequivalentStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Woningequivalent.GetValue(i.Woningequivalent), s) +} + +// match filters Woningequivalent +func FilterWoningequivalentMatch(i *Item, s string) bool { + return Woningequivalent.GetValue(i.Woningequivalent) == s +} + +// getter Woningequivalent +func GettersWoningequivalent(i *Item) string { + return Woningequivalent.GetValue(i.Woningequivalent) +} + +/* +// contain filter Adres +func FilterAdresContains(i *Item, s string) bool { + return strings.Contains(i.Adres, s) +} + +// startswith filter Adres +func FilterAdresStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Adres, s) +} + +// match filters Adres +func FilterAdresMatch(i *Item, s string) bool { + return i.Adres == s +} + +// getter Adres +func GettersAdres(i *Item) string { + return i.Adres +} +*/ + +// contain filter WoningType +func FilterWoningTypeContains(i *Item, s string) bool { + return strings.Contains(WoningType.GetValue(i.WoningType), s) +} + +// startswith filter WoningType +func FilterWoningTypeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(WoningType.GetValue(i.WoningType), s) +} + +// match filters WoningType +func FilterWoningTypeMatch(i *Item, s string) bool { + return WoningType.GetValue(i.WoningType) == s +} + +// getter WoningType +func GettersWoningType(i *Item) string { + return WoningType.GetValue(i.WoningType) +} + +// contain filter LabelscoreVoorlopig +func FilterLabelscoreVoorlopigContains(i *Item, s string) bool { + return strings.Contains(LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), s) +} + +// startswith filter LabelscoreVoorlopig +func FilterLabelscoreVoorlopigStartsWith(i *Item, s string) bool { + return strings.HasPrefix(LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig), s) +} + +// match filters LabelscoreVoorlopig +func FilterLabelscoreVoorlopigMatch(i *Item, s string) bool { + return LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig) == s +} + +// getter LabelscoreVoorlopig +func GettersLabelscoreVoorlopig(i *Item) string { + return LabelscoreVoorlopig.GetValue(i.LabelscoreVoorlopig) +} + +// contain filter LabelscoreDefinitief +func FilterLabelscoreDefinitiefContains(i *Item, s string) bool { + return strings.Contains(LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief), s) +} + +// startswith filter LabelscoreDefinitief +func FilterLabelscoreDefinitiefStartsWith(i *Item, s string) bool { + return strings.HasPrefix(LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief), s) +} + +// match filters LabelscoreDefinitief +func FilterLabelscoreDefinitiefMatch(i *Item, s string) bool { + return LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief) == s +} + +// getter LabelscoreDefinitief +func GettersLabelscoreDefinitief(i *Item) string { + return LabelscoreDefinitief.GetValue(i.LabelscoreDefinitief) +} + +// contain filter Energieklasse +func FilterEnergieklasseContains(i *Item, s string) bool { + return strings.Contains(Energieklasse.GetValue(i.Energieklasse), s) +} + +// startswith filter Energieklasse +func FilterEnergieklasseStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Energieklasse.GetValue(i.Energieklasse), s) +} + +// match filters Energieklasse +func FilterEnergieklasseMatch(i *Item, s string) bool { + return Energieklasse.GetValue(i.Energieklasse) == s +} + +// getter Energieklasse +func GettersEnergieklasse(i *Item) string { + return Energieklasse.GetValue(i.Energieklasse) +} + +// contain filter Gemeentecode +func FilterGemeentecodeContains(i *Item, s string) bool { + return strings.Contains(Gemeentecode.GetValue(i.Gemeentecode), s) +} + +// startswith filter Gemeentecode +func FilterGemeentecodeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Gemeentecode.GetValue(i.Gemeentecode), s) +} + +// match filters Gemeentecode +func FilterGemeentecodeMatch(i *Item, s string) bool { + return Gemeentecode.GetValue(i.Gemeentecode) == s +} + +// getter Gemeentecode +func GettersGemeentecode(i *Item) string { + return Gemeentecode.GetValue(i.Gemeentecode) +} + +// contain filter Gemeentenaam +func FilterGemeentenaamContains(i *Item, s string) bool { + return strings.Contains(Gemeentenaam.GetValue(i.Gemeentenaam), s) +} + +// startswith filter Gemeentenaam +func FilterGemeentenaamStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Gemeentenaam.GetValue(i.Gemeentenaam), s) +} + +// match filters Gemeentenaam +func FilterGemeentenaamMatch(i *Item, s string) bool { + return Gemeentenaam.GetValue(i.Gemeentenaam) == s +} + +// getter Gemeentenaam +func GettersGemeentenaam(i *Item) string { + return Gemeentenaam.GetValue(i.Gemeentenaam) +} + +// contain filter Buurtcode +func FilterBuurtcodeContains(i *Item, s string) bool { + return strings.Contains(Buurtcode.GetValue(i.Buurtcode), s) +} + +// startswith filter Buurtcode +func FilterBuurtcodeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Buurtcode.GetValue(i.Buurtcode), s) +} + +// match filters Buurtcode +func FilterBuurtcodeMatch(i *Item, s string) bool { + return Buurtcode.GetValue(i.Buurtcode) == s +} + +// getter Buurtcode +func GettersBuurtcode(i *Item) string { + return Buurtcode.GetValue(i.Buurtcode) +} + +// contain filter Buurtnaam +func FilterBuurtnaamContains(i *Item, s string) bool { + return strings.Contains(Buurtnaam.GetValue(i.Buurtnaam), s) +} + +// startswith filter Buurtnaam +func FilterBuurtnaamStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Buurtnaam.GetValue(i.Buurtnaam), s) +} + +// match filters Buurtnaam +func FilterBuurtnaamMatch(i *Item, s string) bool { + return Buurtnaam.GetValue(i.Buurtnaam) == s +} + +// getter Buurtnaam +func GettersBuurtnaam(i *Item) string { + return Buurtnaam.GetValue(i.Buurtnaam) +} + +// contain filter Wijkcode +func FilterWijkcodeContains(i *Item, s string) bool { + return strings.Contains(Wijkcode.GetValue(i.Wijkcode), s) +} + +// startswith filter Wijkcode +func FilterWijkcodeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Wijkcode.GetValue(i.Wijkcode), s) +} + +// match filters Wijkcode +func FilterWijkcodeMatch(i *Item, s string) bool { + return Wijkcode.GetValue(i.Wijkcode) == s +} + +// getter Wijkcode +func GettersWijkcode(i *Item) string { + return Wijkcode.GetValue(i.Wijkcode) +} + +// contain filter Wijknaam +func FilterWijknaamContains(i *Item, s string) bool { + return strings.Contains(Wijknaam.GetValue(i.Wijknaam), s) +} + +// startswith filter Wijknaam +func FilterWijknaamStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Wijknaam.GetValue(i.Wijknaam), s) +} + +// match filters Wijknaam +func FilterWijknaamMatch(i *Item, s string) bool { + return Wijknaam.GetValue(i.Wijknaam) == s +} + +// getter Wijknaam +func GettersWijknaam(i *Item) string { + return Wijknaam.GetValue(i.Wijknaam) } -func GettersTitletype(i *Item) string { - return i.Titletype + +// contain filter Provinciecode +func FilterProvinciecodeContains(i *Item, s string) bool { + return strings.Contains(Provinciecode.GetValue(i.Provinciecode), s) +} + +// startswith filter Provinciecode +func FilterProvinciecodeStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Provinciecode.GetValue(i.Provinciecode), s) +} + +// match filters Provinciecode +func FilterProvinciecodeMatch(i *Item, s string) bool { + return Provinciecode.GetValue(i.Provinciecode) == s +} + +// getter Provinciecode +func GettersProvinciecode(i *Item) string { + return Provinciecode.GetValue(i.Provinciecode) +} + +// contain filter Provincienaam +func FilterProvincienaamContains(i *Item, s string) bool { + return strings.Contains(Provincienaam.GetValue(i.Provincienaam), s) +} + +// startswith filter Provincienaam +func FilterProvincienaamStartsWith(i *Item, s string) bool { + return strings.HasPrefix(Provincienaam.GetValue(i.Provincienaam), s) +} + +// match filters Provincienaam +func FilterProvincienaamMatch(i *Item, s string) bool { + return Provincienaam.GetValue(i.Provincienaam) == s +} + +// getter Provincienaam +func GettersProvincienaam(i *Item) string { + return Provincienaam.GetValue(i.Provincienaam) +} + +// contain filter Point +func FilterPointContains(i *Item, s string) bool { + return strings.Contains(i.Point, s) +} + +// startswith filter Point +func FilterPointStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Point, s) +} + +// match filters Point +func FilterPointMatch(i *Item, s string) bool { + return i.Point == s +} + +// getter Point +func GettersPoint(i *Item) string { + return i.Point +} + +// contain filter PandGasEanAansluitingen +func FilterPandGasEanAansluitingenContains(i *Item, s string) bool { + return strings.Contains(PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen), s) +} + +// startswith filter PandGasEanAansluitingen +func FilterPandGasEanAansluitingenStartsWith(i *Item, s string) bool { + return strings.HasPrefix(PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen), s) +} + +// match filters PandGasEanAansluitingen +func FilterPandGasEanAansluitingenMatch(i *Item, s string) bool { + return PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen) == s +} + +// getter PandGasEanAansluitingen +func GettersPandGasEanAansluitingen(i *Item) string { + return PandGasEanAansluitingen.GetValue(i.PandGasEanAansluitingen) +} + +// contain filter GroupId2020 +func FilterGroupId2020Contains(i *Item, s string) bool { + return strings.Contains(i.GroupId2020, s) +} + +// startswith filter GroupId2020 +func FilterGroupId2020StartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.GroupId2020, s) +} + +// match filters GroupId2020 +func FilterGroupId2020Match(i *Item, s string) bool { + return i.GroupId2020 == s +} + +// getter GroupId2020 +func GettersGroupId2020(i *Item) string { + return i.GroupId2020 +} + +// contain filter P6GasAansluitingen2020 +func FilterP6GasAansluitingen2020Contains(i *Item, s string) bool { + return strings.Contains(P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020), s) +} + +// startswith filter P6GasAansluitingen2020 +func FilterP6GasAansluitingen2020StartsWith(i *Item, s string) bool { + return strings.HasPrefix(P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020), s) +} + +// match filters P6GasAansluitingen2020 +func FilterP6GasAansluitingen2020Match(i *Item, s string) bool { + return P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020) == s +} + +// getter P6GasAansluitingen2020 +func GettersP6GasAansluitingen2020(i *Item) string { + return P6GasAansluitingen2020.GetValue(i.P6GasAansluitingen2020) +} + +// contain filter P6Gasm32020 +func FilterP6Gasm32020Contains(i *Item, s string) bool { + return strings.Contains(P6Gasm32020.GetValue(i.P6Gasm32020), s) +} + +// startswith filter P6Gasm32020 +func FilterP6Gasm32020StartsWith(i *Item, s string) bool { + return strings.HasPrefix(P6Gasm32020.GetValue(i.P6Gasm32020), s) +} + +// match filters P6Gasm32020 +func FilterP6Gasm32020Match(i *Item, s string) bool { + return P6Gasm32020.GetValue(i.P6Gasm32020) == s +} + +// getter P6Gasm32020 +func GettersP6Gasm32020(i *Item) string { + return P6Gasm32020.GetValue(i.P6Gasm32020) +} + +// contain filter P6Kwh2020 +func FilterP6Kwh2020Contains(i *Item, s string) bool { + return strings.Contains(P6Kwh2020.GetValue(i.P6Kwh2020), s) +} + +// startswith filter P6Kwh2020 +func FilterP6Kwh2020StartsWith(i *Item, s string) bool { + return strings.HasPrefix(P6Kwh2020.GetValue(i.P6Kwh2020), s) +} + +// match filters P6Kwh2020 +func FilterP6Kwh2020Match(i *Item, s string) bool { + return P6Kwh2020.GetValue(i.P6Kwh2020) == s +} + +// getter P6Kwh2020 +func GettersP6Kwh2020(i *Item) string { + return P6Kwh2020.GetValue(i.P6Kwh2020) +} + +// contain filter P6TotaalPandoppervlakM2 +func FilterP6TotaalPandoppervlakM2Contains(i *Item, s string) bool { + return strings.Contains(P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2), s) +} + +// startswith filter P6TotaalPandoppervlakM2 +func FilterP6TotaalPandoppervlakM2StartsWith(i *Item, s string) bool { + return strings.HasPrefix(P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2), s) +} + +// match filters P6TotaalPandoppervlakM2 +func FilterP6TotaalPandoppervlakM2Match(i *Item, s string) bool { + return P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2) == s +} + +// getter P6TotaalPandoppervlakM2 +func GettersP6TotaalPandoppervlakM2(i *Item) string { + return P6TotaalPandoppervlakM2.GetValue(i.P6TotaalPandoppervlakM2) +} + +// contain filter PandBouwjaar +func FilterPandBouwjaarContains(i *Item, s string) bool { + return strings.Contains(PandBouwjaar.GetValue(i.PandBouwjaar), s) +} + +// startswith filter PandBouwjaar +func FilterPandBouwjaarStartsWith(i *Item, s string) bool { + return strings.HasPrefix(PandBouwjaar.GetValue(i.PandBouwjaar), s) +} + +// match filters PandBouwjaar +func FilterPandBouwjaarMatch(i *Item, s string) bool { + return PandBouwjaar.GetValue(i.PandBouwjaar) == s +} + +// getter PandBouwjaar +func GettersPandBouwjaar(i *Item) string { + return PandBouwjaar.GetValue(i.PandBouwjaar) +} + +// contain filter PandGasAansluitingen +func FilterPandGasAansluitingenContains(i *Item, s string) bool { + return strings.Contains(PandGasAansluitingen.GetValue(i.PandGasAansluitingen), s) +} + +// startswith filter PandGasAansluitingen +func FilterPandGasAansluitingenStartsWith(i *Item, s string) bool { + return strings.HasPrefix(PandGasAansluitingen.GetValue(i.PandGasAansluitingen), s) +} + +// match filters PandGasAansluitingen +func FilterPandGasAansluitingenMatch(i *Item, s string) bool { + return PandGasAansluitingen.GetValue(i.PandGasAansluitingen) == s +} + +// getter PandGasAansluitingen +func GettersPandGasAansluitingen(i *Item) string { + return PandGasAansluitingen.GetValue(i.PandGasAansluitingen) +} + +// contain filter Gebruiksdoelen +func FilterGebruiksdoelenContains(i *Item, s string) bool { + for _, v := range i.Gebruiksdoelen { + vs := Gebruiksdoelen.GetValue(v) + if strings.Contains(vs, s) { + return true + } + } + return false +} + +// startswith filter Gebruiksdoelen +func FilterGebruiksdoelenStartsWith(i *Item, s string) bool { + for _, v := range i.Gebruiksdoelen { + vs := Gebruiksdoelen.GetValue(v) + if strings.HasPrefix(vs, s) { + return true + } + } + return false + } -func GettersPrimarytitle(i *Item) string { - return i.Primarytitle + +// match filters Gebruiksdoelen +func FilterGebruiksdoelenMatch(i *Item, s string) bool { + for _, v := range i.Gebruiksdoelen { + vs := Gebruiksdoelen.GetValue(v) + if vs == s { + return true + } + } + return false } -func GettersOriginaltitle(i *Item) string { - return i.Originaltitle + +// getter Gebruiksdoelen +func GettersGebruiksdoelen(i *Item) string { + return Gebruiksdoelen.GetArrayValue(i.Gebruiksdoelen) } -func GettersIsadult(i *Item) string { - return i.Isadult + +/* +// contain filters +func FilterEkeyContains(i *Item, s string) bool { + return strings.Contains(i.Ekey, s) } -func GettersStartyear(i *Item) string { - return i.Startyear + + +// startswith filters +func FilterEkeyStartsWith(i *Item, s string) bool { + return strings.HasPrefix(i.Ekey, s) } -func GettersEndyear(i *Item) string { - return i.Endyear + + +// match filters +func FilterEkeyMatch(i *Item, s string) bool { + return i.Ekey == s } -func GettersRuntimeminutes(i *Item) string { - return i.Runtimeminutes + +// getters +func GettersEkey(i *Item) string { + return i.Ekey } -func GettersGenres(i *Item) string { - return i.Genres +*/ + +// reduce functions +func reduceCount(items Items) map[string]string { + result := make(map[string]string) + result["count"] = strconv.Itoa(len(items)) + return result } type GroupedOperations struct { - Funcs registerFuncType - GroupBy registerGroupByFunc - Getters registerGettersMap - Reduce registerReduce + Funcs registerFuncType + GroupBy registerGroupByFunc + Getters registerGettersMap + Reduce registerReduce + BitArrays registerBitArray } var Operations GroupedOperations @@ -187,6 +1131,21 @@ var RegisterFuncMap registerFuncType var RegisterGroupBy registerGroupByFunc var RegisterGetters registerGettersMap var RegisterReduce registerReduce +var RegisterBitArray registerBitArray + +// ValidateRegsiters validate exposed columns do match filter names +func validateRegisters() error { + var i = ItemOut{} + var filters = []string{"match", "contains", "startswith"} + for _, c := range i.Columns() { + for _, f := range filters { + if _, ok := RegisterFuncMap[f+"-"+c]; !ok { + return errors.New(c + " is missing in RegisterMap") + } + } + } + return nil +} func init() { @@ -195,97 +1154,478 @@ func init() { RegisterGetters = make(registerGettersMap) RegisterReduce = make(registerReduce) - // register match filters - - RegisterFuncMap["match-tconst"] = FilterTconstMatch - RegisterFuncMap["match-titletype"] = FilterTitletypeMatch - RegisterFuncMap["match-primarytitle"] = FilterPrimarytitleMatch - RegisterFuncMap["match-originaltitle"] = FilterOriginaltitleMatch - RegisterFuncMap["match-isadult"] = FilterIsadultMatch - RegisterFuncMap["match-startyear"] = FilterStartyearMatch - RegisterFuncMap["match-endyear"] = FilterEndyearMatch - RegisterFuncMap["match-runtimeminutes"] = FilterRuntimeminutesMatch - RegisterFuncMap["match-genres"] = FilterGenresMatch - - // register contains filters - RegisterFuncMap["contains-tconst"] = FilterTconstContains - RegisterFuncMap["contains-titletype"] = FilterTitletypeContains - RegisterFuncMap["contains-primarytitle"] = FilterPrimarytitleContains - RegisterFuncMap["contains-originaltitle"] = FilterOriginaltitleContains - RegisterFuncMap["contains-isadult"] = FilterIsadultContains - RegisterFuncMap["contains-startyear"] = FilterStartyearContains - RegisterFuncMap["contains-endyear"] = FilterEndyearContains - RegisterFuncMap["contains-runtimeminutes"] = FilterRuntimeminutesContains - RegisterFuncMap["contains-genres"] = FilterGenresContains - - // register startswith filters - RegisterFuncMap["startswith-tconst"] = FilterTconstStartsWith - RegisterFuncMap["startswith-titletype"] = FilterTitletypeStartsWith - RegisterFuncMap["startswith-primarytitle"] = FilterPrimarytitleStartsWith - RegisterFuncMap["startswith-originaltitle"] = FilterOriginaltitleStartsWith - RegisterFuncMap["startswith-isadult"] = FilterIsadultStartsWith - RegisterFuncMap["startswith-startyear"] = FilterStartyearStartsWith - RegisterFuncMap["startswith-endyear"] = FilterEndyearStartsWith - RegisterFuncMap["startswith-runtimeminutes"] = FilterRuntimeminutesStartsWith - RegisterFuncMap["startswith-genres"] = FilterGenresStartsWith - - // register getters - RegisterGetters["tconst"] = GettersTconst - RegisterGetters["titletype"] = GettersTitletype - RegisterGetters["primarytitle"] = GettersPrimarytitle - RegisterGetters["originaltitle"] = GettersOriginaltitle - RegisterGetters["isadult"] = GettersIsadult - RegisterGetters["startyear"] = GettersStartyear - RegisterGetters["endyear"] = GettersEndyear - RegisterGetters["runtimeminutes"] = GettersRuntimeminutes - RegisterGetters["genres"] = GettersGenres - - // register groupby - RegisterGroupBy["tconst"] = GettersTconst - RegisterGroupBy["titletype"] = GettersTitletype - RegisterGroupBy["primarytitle"] = GettersPrimarytitle - RegisterGroupBy["originaltitle"] = GettersOriginaltitle - RegisterGroupBy["isadult"] = GettersIsadult - RegisterGroupBy["startyear"] = GettersStartyear - RegisterGroupBy["endyear"] = GettersEndyear - RegisterGroupBy["runtimeminutes"] = GettersRuntimeminutes - RegisterGroupBy["genres"] = GettersGenres + // register search filter. + //RegisterFuncMap["search"] = 'EDITYOURSELF' + // example RegisterFuncMap["search"] = FilterEkeyStartsWith + + //RegisterFuncMap["value"] = 'EDITYOURSELF' + RegisterGetters["value"] = GettersGemeentecode + + // register filters + + //register filters for Pid + RegisterFuncMap["match-pid"] = FilterPidMatch + RegisterFuncMap["contains-pid"] = FilterPidContains + RegisterFuncMap["startswith-pid"] = FilterPidStartsWith + RegisterGetters["pid"] = GettersPid + RegisterGroupBy["pid"] = GettersPid + + //register filters for Vid + RegisterFuncMap["match-vid"] = FilterVidMatch + RegisterFuncMap["contains-vid"] = FilterVidContains + RegisterFuncMap["startswith-vid"] = FilterVidStartsWith + RegisterGetters["vid"] = GettersVid + RegisterGroupBy["vid"] = GettersVid + + //register filters for Numid + RegisterFuncMap["match-numid"] = FilterNumidMatch + RegisterFuncMap["contains-numid"] = FilterNumidContains + RegisterFuncMap["startswith-numid"] = FilterNumidStartsWith + RegisterGetters["numid"] = GettersNumid + RegisterGroupBy["numid"] = GettersNumid + + //register filters for Straat + RegisterFuncMap["match-straat"] = FilterStraatMatch + RegisterFuncMap["contains-straat"] = FilterStraatContains + RegisterFuncMap["startswith-straat"] = FilterStraatStartsWith + RegisterGetters["straat"] = GettersStraat + RegisterGroupBy["straat"] = GettersStraat + + //register filters for Postcode + RegisterFuncMap["match-postcode"] = FilterPostcodeMatch + RegisterFuncMap["contains-postcode"] = FilterPostcodeContains + RegisterFuncMap["startswith-postcode"] = FilterPostcodeStartsWith + RegisterGetters["postcode"] = GettersPostcode + RegisterGroupBy["postcode"] = GettersPostcode + + //register filters for Huisnummer + RegisterFuncMap["match-huisnummer"] = FilterHuisnummerMatch + RegisterFuncMap["contains-huisnummer"] = FilterHuisnummerContains + RegisterFuncMap["startswith-huisnummer"] = FilterHuisnummerStartsWith + RegisterGetters["huisnummer"] = GettersHuisnummer + RegisterGroupBy["huisnummer"] = GettersHuisnummer + + //register filters for Huisletter + RegisterFuncMap["match-huisletter"] = FilterHuisletterMatch + RegisterFuncMap["contains-huisletter"] = FilterHuisletterContains + RegisterFuncMap["startswith-huisletter"] = FilterHuisletterStartsWith + RegisterGetters["huisletter"] = GettersHuisletter + RegisterGroupBy["huisletter"] = GettersHuisletter + + //register filters for Huisnummertoevoeging + RegisterFuncMap["match-huisnummertoevoeging"] = FilterHuisnummertoevoegingMatch + RegisterFuncMap["contains-huisnummertoevoeging"] = FilterHuisnummertoevoegingContains + RegisterFuncMap["startswith-huisnummertoevoeging"] = FilterHuisnummertoevoegingStartsWith + RegisterGetters["huisnummertoevoeging"] = GettersHuisnummertoevoeging + RegisterGroupBy["huisnummertoevoeging"] = GettersHuisnummertoevoeging + + //register filters for Oppervlakte + RegisterFuncMap["match-oppervlakte"] = FilterOppervlakteMatch + RegisterFuncMap["contains-oppervlakte"] = FilterOppervlakteContains + RegisterFuncMap["startswith-oppervlakte"] = FilterOppervlakteStartsWith + RegisterGetters["oppervlakte"] = GettersOppervlakte + RegisterGroupBy["oppervlakte"] = GettersOppervlakte + + //register filters for Woningequivalent + RegisterFuncMap["match-woningequivalent"] = FilterWoningequivalentMatch + RegisterFuncMap["contains-woningequivalent"] = FilterWoningequivalentContains + RegisterFuncMap["startswith-woningequivalent"] = FilterWoningequivalentStartsWith + RegisterGetters["woningequivalent"] = GettersWoningequivalent + RegisterGroupBy["woningequivalent"] = GettersWoningequivalent + //register filters for WoningType + RegisterFuncMap["match-woning_type"] = FilterWoningTypeMatch + RegisterFuncMap["contains-woning_type"] = FilterWoningTypeContains + RegisterFuncMap["startswith-woning_type"] = FilterWoningTypeStartsWith + RegisterGetters["woning_type"] = GettersWoningType + RegisterGroupBy["woning_type"] = GettersWoningType + + //register filters for LabelscoreVoorlopig + RegisterFuncMap["match-labelscore_voorlopig"] = FilterLabelscoreVoorlopigMatch + RegisterFuncMap["contains-labelscore_voorlopig"] = FilterLabelscoreVoorlopigContains + RegisterFuncMap["startswith-labelscore_voorlopig"] = FilterLabelscoreVoorlopigStartsWith + RegisterGetters["labelscore_voorlopig"] = GettersLabelscoreVoorlopig + RegisterGroupBy["labelscore_voorlopig"] = GettersLabelscoreVoorlopig + + //register filters for LabelscoreDefinitief + RegisterFuncMap["match-labelscore_definitief"] = FilterLabelscoreDefinitiefMatch + RegisterFuncMap["contains-labelscore_definitief"] = FilterLabelscoreDefinitiefContains + RegisterFuncMap["startswith-labelscore_definitief"] = FilterLabelscoreDefinitiefStartsWith + RegisterGetters["labelscore_definitief"] = GettersLabelscoreDefinitief + RegisterGroupBy["labelscore_definitief"] = GettersLabelscoreDefinitief + + //register filters for Energieklasse + RegisterFuncMap["match-energieklasse"] = FilterEnergieklasseMatch + RegisterFuncMap["contains-energieklasse"] = FilterEnergieklasseContains + RegisterFuncMap["startswith-energieklasse"] = FilterEnergieklasseStartsWith + RegisterGetters["energieklasse"] = GettersEnergieklasse + RegisterGroupBy["energieklasse"] = GettersEnergieklasse + + //register filters for Gemeentecode + RegisterFuncMap["match-gemeentecode"] = FilterGemeentecodeMatch + RegisterFuncMap["contains-gemeentecode"] = FilterGemeentecodeContains + RegisterFuncMap["startswith-gemeentecode"] = FilterGemeentecodeStartsWith + RegisterGetters["gemeentecode"] = GettersGemeentecode + RegisterGroupBy["gemeentecode"] = GettersGemeentecode + + //register filters for Gemeentenaam + RegisterFuncMap["match-gemeentenaam"] = FilterGemeentenaamMatch + RegisterFuncMap["contains-gemeentenaam"] = FilterGemeentenaamContains + RegisterFuncMap["startswith-gemeentenaam"] = FilterGemeentenaamStartsWith + RegisterGetters["gemeentenaam"] = GettersGemeentenaam + RegisterGroupBy["gemeentenaam"] = GettersGemeentenaam + + //register filters for Buurtcode + RegisterFuncMap["match-buurtcode"] = FilterBuurtcodeMatch + RegisterFuncMap["contains-buurtcode"] = FilterBuurtcodeContains + RegisterFuncMap["startswith-buurtcode"] = FilterBuurtcodeStartsWith + RegisterGetters["buurtcode"] = GettersBuurtcode + RegisterGroupBy["buurtcode"] = GettersBuurtcode + + //register filters for Buurtnaam + RegisterFuncMap["match-buurtnaam"] = FilterBuurtnaamMatch + RegisterFuncMap["contains-buurtnaam"] = FilterBuurtnaamContains + RegisterFuncMap["startswith-buurtnaam"] = FilterBuurtnaamStartsWith + RegisterGetters["buurtnaam"] = GettersBuurtnaam + RegisterGroupBy["buurtnaam"] = GettersBuurtnaam + + //register filters for Wijkcode + RegisterFuncMap["match-wijkcode"] = FilterWijkcodeMatch + RegisterFuncMap["contains-wijkcode"] = FilterWijkcodeContains + RegisterFuncMap["startswith-wijkcode"] = FilterWijkcodeStartsWith + RegisterGetters["wijkcode"] = GettersWijkcode + RegisterGroupBy["wijkcode"] = GettersWijkcode + + //register filters for Wijknaam + RegisterFuncMap["match-wijknaam"] = FilterWijknaamMatch + RegisterFuncMap["contains-wijknaam"] = FilterWijknaamContains + RegisterFuncMap["startswith-wijknaam"] = FilterWijknaamStartsWith + RegisterGetters["wijknaam"] = GettersWijknaam + RegisterGroupBy["wijknaam"] = GettersWijknaam + + //register filters for Provinciecode + RegisterFuncMap["match-provinciecode"] = FilterProvinciecodeMatch + RegisterFuncMap["contains-provinciecode"] = FilterProvinciecodeContains + RegisterFuncMap["startswith-provinciecode"] = FilterProvinciecodeStartsWith + RegisterGetters["provinciecode"] = GettersProvinciecode + RegisterGroupBy["provinciecode"] = GettersProvinciecode + + //register filters for Provincienaam + RegisterFuncMap["match-provincienaam"] = FilterProvincienaamMatch + RegisterFuncMap["contains-provincienaam"] = FilterProvincienaamContains + RegisterFuncMap["startswith-provincienaam"] = FilterProvincienaamStartsWith + RegisterGetters["provincienaam"] = GettersProvincienaam + RegisterGroupBy["provincienaam"] = GettersProvincienaam + + //register filters for Point + RegisterFuncMap["match-point"] = FilterPointMatch + RegisterFuncMap["contains-point"] = FilterPointContains + RegisterFuncMap["startswith-point"] = FilterPointStartsWith + RegisterGetters["point"] = GettersPoint + RegisterGroupBy["point"] = GettersPoint + + //register filters for PandGasEanAansluitingen + RegisterFuncMap["match-pand_gas_ean_aansluitingen"] = FilterPandGasEanAansluitingenMatch + RegisterFuncMap["contains-pand_gas_ean_aansluitingen"] = FilterPandGasEanAansluitingenContains + RegisterFuncMap["startswith-pand_gas_ean_aansluitingen"] = FilterPandGasEanAansluitingenStartsWith + RegisterGetters["pand_gas_ean_aansluitingen"] = GettersPandGasEanAansluitingen + RegisterGroupBy["pand_gas_ean_aansluitingen"] = GettersPandGasEanAansluitingen + + //register filters for GroupId2020 + RegisterFuncMap["match-group_id_2020"] = FilterGroupId2020Match + RegisterFuncMap["contains-group_id_2020"] = FilterGroupId2020Contains + RegisterFuncMap["startswith-group_id_2020"] = FilterGroupId2020StartsWith + RegisterGetters["group_id_2020"] = GettersGroupId2020 + RegisterGroupBy["group_id_2020"] = GettersGroupId2020 + + //register filters for P6GasAansluitingen2020 + RegisterFuncMap["match-p6_gas_aansluitingen_2020"] = FilterP6GasAansluitingen2020Match + RegisterFuncMap["contains-p6_gas_aansluitingen_2020"] = FilterP6GasAansluitingen2020Contains + RegisterFuncMap["startswith-p6_gas_aansluitingen_2020"] = FilterP6GasAansluitingen2020StartsWith + RegisterGetters["p6_gas_aansluitingen_2020"] = GettersP6GasAansluitingen2020 + RegisterGroupBy["p6_gas_aansluitingen_2020"] = GettersP6GasAansluitingen2020 + + //register filters for P6Gasm32020 + RegisterFuncMap["match-p6_gasm3_2020"] = FilterP6Gasm32020Match + RegisterFuncMap["contains-p6_gasm3_2020"] = FilterP6Gasm32020Contains + RegisterFuncMap["startswith-p6_gasm3_2020"] = FilterP6Gasm32020StartsWith + RegisterGetters["p6_gasm3_2020"] = GettersP6Gasm32020 + RegisterGroupBy["p6_gasm3_2020"] = GettersP6Gasm32020 + + //register filters for P6Kwh2020 + RegisterFuncMap["match-p6_kwh_2020"] = FilterP6Kwh2020Match + RegisterFuncMap["contains-p6_kwh_2020"] = FilterP6Kwh2020Contains + RegisterFuncMap["startswith-p6_kwh_2020"] = FilterP6Kwh2020StartsWith + RegisterGetters["p6_kwh_2020"] = GettersP6Kwh2020 + RegisterGroupBy["p6_kwh_2020"] = GettersP6Kwh2020 + + //register filters for P6TotaalPandoppervlakM2 + RegisterFuncMap["match-p6_totaal_pandoppervlak_m2"] = FilterP6TotaalPandoppervlakM2Match + RegisterFuncMap["contains-p6_totaal_pandoppervlak_m2"] = FilterP6TotaalPandoppervlakM2Contains + RegisterFuncMap["startswith-p6_totaal_pandoppervlak_m2"] = FilterP6TotaalPandoppervlakM2StartsWith + RegisterGetters["p6_totaal_pandoppervlak_m2"] = GettersP6TotaalPandoppervlakM2 + RegisterGroupBy["p6_totaal_pandoppervlak_m2"] = GettersP6TotaalPandoppervlakM2 + + //register filters for PandBouwjaar + RegisterFuncMap["match-pand_bouwjaar"] = FilterPandBouwjaarMatch + RegisterFuncMap["contains-pand_bouwjaar"] = FilterPandBouwjaarContains + RegisterFuncMap["startswith-pand_bouwjaar"] = FilterPandBouwjaarStartsWith + RegisterGetters["pand_bouwjaar"] = GettersPandBouwjaar + RegisterGroupBy["pand_bouwjaar"] = GettersPandBouwjaar + + //register filters for PandGasAansluitingen + RegisterFuncMap["match-pand_gas_aansluitingen"] = FilterPandGasAansluitingenMatch + RegisterFuncMap["contains-pand_gas_aansluitingen"] = FilterPandGasAansluitingenContains + RegisterFuncMap["startswith-pand_gas_aansluitingen"] = FilterPandGasAansluitingenStartsWith + RegisterGetters["pand_gas_aansluitingen"] = GettersPandGasAansluitingen + RegisterGroupBy["pand_gas_aansluitingen"] = GettersPandGasAansluitingen + + //register filters for Gebruiksdoelen + RegisterFuncMap["match-gebruiksdoelen"] = FilterGebruiksdoelenMatch + RegisterFuncMap["contains-gebruiksdoelen"] = FilterGebruiksdoelenContains + RegisterFuncMap["startswith-gebruiksdoelen"] = FilterGebruiksdoelenStartsWith + RegisterGetters["gebruiksdoelen"] = GettersGebruiksdoelen + RegisterGroupBy["gebruiksdoelen"] = GettersGebruiksdoelen + + RegisterGroupBy["postcodehuisnummer"] = GettersToevoegingen + + validateRegisters() + + /* + RegisterFuncMap["match-ekey"] = FilterEkeyMatch + RegisterFuncMap["contains-ekey"] = FilterEkeyContains + // register startswith filters + RegisterFuncMap["startswith-ekey"] = FilterEkeyStartsWith + // register getters + RegisterGetters["ekey"] = GettersEkey + // register groupby + RegisterGroupBy["ekey"] = GettersEkey + + */ // register reduce functions RegisterReduce["count"] = reduceCount + RegisterReduce["woningequivalent"] = reduceWEQ } -func sortBy(items Items, sortingL []string) (Items, []string) { - sortFuncs := map[string]func(int, int) bool{"tconst": func(i, j int) bool { return items[i].Tconst < items[j].Tconst }, - "-tconst": func(i, j int) bool { return items[i].Tconst > items[j].Tconst }, - "titletype": func(i, j int) bool { return items[i].Titletype < items[j].Titletype }, - "-titletype": func(i, j int) bool { return items[i].Titletype > items[j].Titletype }, +type sortLookup map[string]func(int, int) bool + +func createSort(items Items) sortLookup { + + sortFuncs := sortLookup{ + + "pid": func(i, j int) bool { return Pid.GetValue(items[i].Pid) < Pid.GetValue(items[j].Pid) }, + "-pid": func(i, j int) bool { return Pid.GetValue(items[i].Pid) > Pid.GetValue(items[j].Pid) }, + + "vid": func(i, j int) bool { return Vid.GetValue(items[i].Vid) < Vid.GetValue(items[j].Vid) }, + "-vid": func(i, j int) bool { return Vid.GetValue(items[i].Vid) > Vid.GetValue(items[j].Vid) }, + + "numid": func(i, j int) bool { return items[i].Numid < items[j].Numid }, + "-numid": func(i, j int) bool { return items[i].Numid > items[j].Numid }, + + "straat": func(i, j int) bool { return Straat.GetValue(items[i].Straat) < Straat.GetValue(items[j].Straat) }, + "-straat": func(i, j int) bool { return Straat.GetValue(items[i].Straat) > Straat.GetValue(items[j].Straat) }, + + "postcode": func(i, j int) bool { + return Postcode.GetValue(items[i].Postcode) < Postcode.GetValue(items[j].Postcode) + }, + "-postcode": func(i, j int) bool { + return Postcode.GetValue(items[i].Postcode) > Postcode.GetValue(items[j].Postcode) + }, + + "huisnummer": func(i, j int) bool { + return Huisnummer.GetValue(items[i].Huisnummer) < Huisnummer.GetValue(items[j].Huisnummer) + }, + "-huisnummer": func(i, j int) bool { + return Huisnummer.GetValue(items[i].Huisnummer) > Huisnummer.GetValue(items[j].Huisnummer) + }, + + "huisletter": func(i, j int) bool { + return Huisletter.GetValue(items[i].Huisletter) < Huisletter.GetValue(items[j].Huisletter) + }, + "-huisletter": func(i, j int) bool { + return Huisletter.GetValue(items[i].Huisletter) > Huisletter.GetValue(items[j].Huisletter) + }, - "primarytitle": func(i, j int) bool { return items[i].Primarytitle < items[j].Primarytitle }, - "-primarytitle": func(i, j int) bool { return items[i].Primarytitle > items[j].Primarytitle }, + "huisnummertoevoeging": func(i, j int) bool { + return Huisnummertoevoeging.GetValue(items[i].Huisnummertoevoeging) < Huisnummertoevoeging.GetValue(items[j].Huisnummertoevoeging) + }, + "-huisnummertoevoeging": func(i, j int) bool { + return Huisnummertoevoeging.GetValue(items[i].Huisnummertoevoeging) > Huisnummertoevoeging.GetValue(items[j].Huisnummertoevoeging) + }, - "originaltitle": func(i, j int) bool { return items[i].Originaltitle < items[j].Originaltitle }, - "-originaltitle": func(i, j int) bool { return items[i].Originaltitle > items[j].Originaltitle }, + "oppervlakte": func(i, j int) bool { + return Oppervlakte.GetValue(items[i].Oppervlakte) < Oppervlakte.GetValue(items[j].Oppervlakte) + }, + "-oppervlakte": func(i, j int) bool { + return Oppervlakte.GetValue(items[i].Oppervlakte) > Oppervlakte.GetValue(items[j].Oppervlakte) + }, - "isadult": func(i, j int) bool { return items[i].Isadult < items[j].Isadult }, - "-isadult": func(i, j int) bool { return items[i].Isadult > items[j].Isadult }, + "woningequivalent": func(i, j int) bool { + return Woningequivalent.GetValue(items[i].Woningequivalent) < Woningequivalent.GetValue(items[j].Woningequivalent) + }, + "-woningequivalent": func(i, j int) bool { + return Woningequivalent.GetValue(items[i].Woningequivalent) > Woningequivalent.GetValue(items[j].Woningequivalent) + }, - "startyear": func(i, j int) bool { return items[i].Startyear < items[j].Startyear }, - "-startyear": func(i, j int) bool { return items[i].Startyear > items[j].Startyear }, + "woning_type": func(i, j int) bool { + return WoningType.GetValue(items[i].WoningType) < WoningType.GetValue(items[j].WoningType) + }, + "-woning_type": func(i, j int) bool { + return WoningType.GetValue(items[i].WoningType) > WoningType.GetValue(items[j].WoningType) + }, - "endyear": func(i, j int) bool { return items[i].Endyear < items[j].Endyear }, - "-endyear": func(i, j int) bool { return items[i].Endyear > items[j].Endyear }, + "labelscore_voorlopig": func(i, j int) bool { + return LabelscoreVoorlopig.GetValue(items[i].LabelscoreVoorlopig) < LabelscoreVoorlopig.GetValue(items[j].LabelscoreVoorlopig) + }, + "-labelscore_voorlopig": func(i, j int) bool { + return LabelscoreVoorlopig.GetValue(items[i].LabelscoreVoorlopig) > LabelscoreVoorlopig.GetValue(items[j].LabelscoreVoorlopig) + }, - "runtimeminutes": func(i, j int) bool { return items[i].Runtimeminutes < items[j].Runtimeminutes }, - "-runtimeminutes": func(i, j int) bool { return items[i].Runtimeminutes > items[j].Runtimeminutes }, + "labelscore_definitief": func(i, j int) bool { + return LabelscoreDefinitief.GetValue(items[i].LabelscoreDefinitief) < LabelscoreDefinitief.GetValue(items[j].LabelscoreDefinitief) + }, + "-labelscore_definitief": func(i, j int) bool { + return LabelscoreDefinitief.GetValue(items[i].LabelscoreDefinitief) > LabelscoreDefinitief.GetValue(items[j].LabelscoreDefinitief) + }, - "genres": func(i, j int) bool { return items[i].Genres < items[j].Genres }, - "-genres": func(i, j int) bool { return items[i].Genres > items[j].Genres }, + "energieklasse": func(i, j int) bool { + return Energieklasse.GetValue(items[i].Energieklasse) < Energieklasse.GetValue(items[j].Energieklasse) + }, + "-energieklasse": func(i, j int) bool { + return Energieklasse.GetValue(items[i].Energieklasse) > Energieklasse.GetValue(items[j].Energieklasse) + }, + + "gemeentecode": func(i, j int) bool { + return Gemeentecode.GetValue(items[i].Gemeentecode) < Gemeentecode.GetValue(items[j].Gemeentecode) + }, + "-gemeentecode": func(i, j int) bool { + return Gemeentecode.GetValue(items[i].Gemeentecode) > Gemeentecode.GetValue(items[j].Gemeentecode) + }, + + "gemeentenaam": func(i, j int) bool { + return Gemeentenaam.GetValue(items[i].Gemeentenaam) < Gemeentenaam.GetValue(items[j].Gemeentenaam) + }, + "-gemeentenaam": func(i, j int) bool { + return Gemeentenaam.GetValue(items[i].Gemeentenaam) > Gemeentenaam.GetValue(items[j].Gemeentenaam) + }, + + "buurtcode": func(i, j int) bool { + return Buurtcode.GetValue(items[i].Buurtcode) < Buurtcode.GetValue(items[j].Buurtcode) + }, + "-buurtcode": func(i, j int) bool { + return Buurtcode.GetValue(items[i].Buurtcode) > Buurtcode.GetValue(items[j].Buurtcode) + }, + + "buurtnaam": func(i, j int) bool { + return Buurtnaam.GetValue(items[i].Buurtnaam) < Buurtnaam.GetValue(items[j].Buurtnaam) + }, + "-buurtnaam": func(i, j int) bool { + return Buurtnaam.GetValue(items[i].Buurtnaam) > Buurtnaam.GetValue(items[j].Buurtnaam) + }, + + "wijkcode": func(i, j int) bool { + return Wijkcode.GetValue(items[i].Wijkcode) < Wijkcode.GetValue(items[j].Wijkcode) + }, + "-wijkcode": func(i, j int) bool { + return Wijkcode.GetValue(items[i].Wijkcode) > Wijkcode.GetValue(items[j].Wijkcode) + }, + + "wijknaam": func(i, j int) bool { + return Wijknaam.GetValue(items[i].Wijknaam) < Wijknaam.GetValue(items[j].Wijknaam) + }, + "-wijknaam": func(i, j int) bool { + return Wijknaam.GetValue(items[i].Wijknaam) > Wijknaam.GetValue(items[j].Wijknaam) + }, + + "provinciecode": func(i, j int) bool { + return Provinciecode.GetValue(items[i].Provinciecode) < Provinciecode.GetValue(items[j].Provinciecode) + }, + "-provinciecode": func(i, j int) bool { + return Provinciecode.GetValue(items[i].Provinciecode) > Provinciecode.GetValue(items[j].Provinciecode) + }, + + "provincienaam": func(i, j int) bool { + return Provincienaam.GetValue(items[i].Provincienaam) < Provincienaam.GetValue(items[j].Provincienaam) + }, + "-provincienaam": func(i, j int) bool { + return Provincienaam.GetValue(items[i].Provincienaam) > Provincienaam.GetValue(items[j].Provincienaam) + }, + + "point": func(i, j int) bool { return items[i].Point < items[j].Point }, + "-point": func(i, j int) bool { return items[i].Point > items[j].Point }, + + "pand_gas_ean_aansluitingen": func(i, j int) bool { + return PandGasEanAansluitingen.GetValue(items[i].PandGasEanAansluitingen) < PandGasEanAansluitingen.GetValue(items[j].PandGasEanAansluitingen) + }, + "-pand_gas_ean_aansluitingen": func(i, j int) bool { + return PandGasEanAansluitingen.GetValue(items[i].PandGasEanAansluitingen) > PandGasEanAansluitingen.GetValue(items[j].PandGasEanAansluitingen) + }, + + "group_id_2020": func(i, j int) bool { return items[i].GroupId2020 < items[j].GroupId2020 }, + "-group_id_2020": func(i, j int) bool { return items[i].GroupId2020 > items[j].GroupId2020 }, + + "p6_gas_aansluitingen_2020": func(i, j int) bool { + return P6GasAansluitingen2020.GetValue(items[i].P6GasAansluitingen2020) < P6GasAansluitingen2020.GetValue(items[j].P6GasAansluitingen2020) + }, + "-p6_gas_aansluitingen_2020": func(i, j int) bool { + return P6GasAansluitingen2020.GetValue(items[i].P6GasAansluitingen2020) > P6GasAansluitingen2020.GetValue(items[j].P6GasAansluitingen2020) + }, + + "p6_gasm3_2020": func(i, j int) bool { + return P6Gasm32020.GetValue(items[i].P6Gasm32020) < P6Gasm32020.GetValue(items[j].P6Gasm32020) + }, + "-p6_gasm3_2020": func(i, j int) bool { + return P6Gasm32020.GetValue(items[i].P6Gasm32020) > P6Gasm32020.GetValue(items[j].P6Gasm32020) + }, + + "p6_kwh_2020": func(i, j int) bool { + return P6Kwh2020.GetValue(items[i].P6Kwh2020) < P6Kwh2020.GetValue(items[j].P6Kwh2020) + }, + "-p6_kwh_2020": func(i, j int) bool { + return P6Kwh2020.GetValue(items[i].P6Kwh2020) > P6Kwh2020.GetValue(items[j].P6Kwh2020) + }, + + "p6_totaal_pandoppervlak_m2": func(i, j int) bool { + return P6TotaalPandoppervlakM2.GetValue(items[i].P6TotaalPandoppervlakM2) < P6TotaalPandoppervlakM2.GetValue(items[j].P6TotaalPandoppervlakM2) + }, + "-p6_totaal_pandoppervlak_m2": func(i, j int) bool { + return P6TotaalPandoppervlakM2.GetValue(items[i].P6TotaalPandoppervlakM2) > P6TotaalPandoppervlakM2.GetValue(items[j].P6TotaalPandoppervlakM2) + }, + + "pand_bouwjaar": func(i, j int) bool { + return PandBouwjaar.GetValue(items[i].PandBouwjaar) < PandBouwjaar.GetValue(items[j].PandBouwjaar) + }, + "-pand_bouwjaar": func(i, j int) bool { + return PandBouwjaar.GetValue(items[i].PandBouwjaar) > PandBouwjaar.GetValue(items[j].PandBouwjaar) + }, + + "pand_gas_aansluitingen": func(i, j int) bool { + return PandGasAansluitingen.GetValue(items[i].PandGasAansluitingen) < PandGasAansluitingen.GetValue(items[j].PandGasAansluitingen) + }, + "-pand_gas_aansluitingen": func(i, j int) bool { + return PandGasAansluitingen.GetValue(items[i].PandGasAansluitingen) > PandGasAansluitingen.GetValue(items[j].PandGasAansluitingen) + }, + + "gebruiksdoelen": func(i, j int) bool { + return Gebruiksdoelen.GetArrayValue(items[i].Gebruiksdoelen) < Gebruiksdoelen.GetArrayValue(items[j].Gebruiksdoelen) + }, + "-gebruiksdoelen": func(i, j int) bool { + return Gebruiksdoelen.GetArrayValue(items[i].Gebruiksdoelen) > Gebruiksdoelen.GetArrayValue(items[j].Gebruiksdoelen) + }, } + return sortFuncs +} + +func sortBy(items Items, sortingL []string) (Items, []string) { + sortFuncs := createSort(items) + for _, sortFuncName := range sortingL { - sortFunc := sortFuncs[sortFuncName] - sort.Slice(items, sortFunc) + sortFunc, ok := sortFuncs[sortFuncName] + if ok { + sort.Slice(items, sortFunc) + } } + // TODO must be nicer way keys := []string{} for key := range sortFuncs { diff --git a/model_maps.go b/model_maps.go new file mode 100644 index 0000000..4097cc7 --- /dev/null +++ b/model_maps.go @@ -0,0 +1,212 @@ +/* + Transforming ItemsIn -> Items -> ItemsOut + Where Items has column values ar integers to save memmory + maps are needed to restore integers back to the actual values. + those are generated and stored here. +*/ + +package main + +type ModelMaps struct { + Pid MappedColumn + Vid MappedColumn + Straat MappedColumn + Postcode MappedColumn + Huisnummer MappedColumn + Huisletter MappedColumn + Huisnummertoevoeging MappedColumn + Oppervlakte MappedColumn + Woningequivalent MappedColumn + WoningType MappedColumn + LabelscoreVoorlopig MappedColumn + LabelscoreDefinitief MappedColumn + Energieklasse MappedColumn + Gemeentecode MappedColumn + Gemeentenaam MappedColumn + Buurtcode MappedColumn + Buurtnaam MappedColumn + Wijkcode MappedColumn + Wijknaam MappedColumn + Provinciecode MappedColumn + Provincienaam MappedColumn + PandGasEanAansluitingen MappedColumn + P6GasAansluitingen2020 MappedColumn + P6Gasm32020 MappedColumn + P6Kwh2020 MappedColumn + P6TotaalPandoppervlakM2 MappedColumn + PandBouwjaar MappedColumn + PandGasAansluitingen MappedColumn + Gebruiksdoelen MappedColumn +} + +var BitArrays map[string]fieldBitarrayMap + +var Pid MappedColumn +var Vid MappedColumn +var Straat MappedColumn +var Postcode MappedColumn +var Huisnummer MappedColumn +var Huisletter MappedColumn +var Huisnummertoevoeging MappedColumn +var Oppervlakte MappedColumn +var Woningequivalent MappedColumn +var WoningType MappedColumn +var LabelscoreVoorlopig MappedColumn +var LabelscoreDefinitief MappedColumn +var Energieklasse MappedColumn +var Gemeentecode MappedColumn +var Gemeentenaam MappedColumn +var Buurtcode MappedColumn +var Buurtnaam MappedColumn +var Wijkcode MappedColumn +var Wijknaam MappedColumn +var Provinciecode MappedColumn +var Provincienaam MappedColumn +var PandGasEanAansluitingen MappedColumn +var P6GasAansluitingen2020 MappedColumn +var P6Gasm32020 MappedColumn +var P6Kwh2020 MappedColumn +var P6TotaalPandoppervlakM2 MappedColumn +var PandBouwjaar MappedColumn +var PandGasAansluitingen MappedColumn +var Gebruiksdoelen MappedColumn + +func clearBitArrays() { + BitArrays = make(map[string]fieldBitarrayMap) +} + +func init() { + clearBitArrays() + setUpRepeatedColumns() +} + +func setUpRepeatedColumns() { + Pid = NewReapeatedColumn("pid") + Vid = NewReapeatedColumn("vid") + Straat = NewReapeatedColumn("straat") + Postcode = NewReapeatedColumn("postcode") + Huisnummer = NewReapeatedColumn("huisnummer") + Huisletter = NewReapeatedColumn("huisletter") + Huisnummertoevoeging = NewReapeatedColumn("huisnummertoevoeging") + Oppervlakte = NewReapeatedColumn("oppervlakte") + Woningequivalent = NewReapeatedColumn("woningequivalent") + WoningType = NewReapeatedColumn("woning_type") + LabelscoreVoorlopig = NewReapeatedColumn("labelscore_voorlopig") + LabelscoreDefinitief = NewReapeatedColumn("labelscore_definitief") + Energieklasse = NewReapeatedColumn("energieklasse") + Gemeentecode = NewReapeatedColumn("gemeentecode") + Gemeentenaam = NewReapeatedColumn("gemeentenaam") + Buurtcode = NewReapeatedColumn("buurtcode") + Buurtnaam = NewReapeatedColumn("buurtnaam") + Wijkcode = NewReapeatedColumn("wijkcode") + Wijknaam = NewReapeatedColumn("wijknaam") + Provinciecode = NewReapeatedColumn("provinciecode") + Provincienaam = NewReapeatedColumn("provincienaam") + PandGasEanAansluitingen = NewReapeatedColumn("pand_gas_ean_aansluitingen") + P6GasAansluitingen2020 = NewReapeatedColumn("p6_gas_aansluitingen_2020") + P6Gasm32020 = NewReapeatedColumn("p6_gasm3_2020") + P6Kwh2020 = NewReapeatedColumn("p6_kwh_2020") + P6TotaalPandoppervlakM2 = NewReapeatedColumn("p6_totaal_pandoppervlak_m2") + PandBouwjaar = NewReapeatedColumn("pand_bouwjaar") + PandGasAansluitingen = NewReapeatedColumn("pand_gas_aansluitingen") + Gebruiksdoelen = NewReapeatedColumn("gebruiksdoelen") + +} + +func CreateMapstore() ModelMaps { + return ModelMaps{ + Pid, + Vid, + Straat, + Postcode, + Huisnummer, + Huisletter, + Huisnummertoevoeging, + Oppervlakte, + Woningequivalent, + WoningType, + LabelscoreVoorlopig, + LabelscoreDefinitief, + Energieklasse, + Gemeentecode, + Gemeentenaam, + Buurtcode, + Buurtnaam, + Wijkcode, + Wijknaam, + Provinciecode, + Provincienaam, + PandGasEanAansluitingen, + P6GasAansluitingen2020, + P6Gasm32020, + P6Kwh2020, + P6TotaalPandoppervlakM2, + PandBouwjaar, + PandGasAansluitingen, + Gebruiksdoelen, + } +} + +func LoadMapstore(m ModelMaps) { + + Pid = m.Pid + Vid = m.Vid + Straat = m.Straat + Postcode = m.Postcode + Huisnummer = m.Huisnummer + Huisletter = m.Huisletter + Huisnummertoevoeging = m.Huisnummertoevoeging + Oppervlakte = m.Oppervlakte + Woningequivalent = m.Woningequivalent + WoningType = m.WoningType + LabelscoreVoorlopig = m.LabelscoreVoorlopig + LabelscoreDefinitief = m.LabelscoreDefinitief + Energieklasse = m.Energieklasse + Gemeentecode = m.Gemeentecode + Gemeentenaam = m.Gemeentenaam + Buurtcode = m.Buurtcode + Buurtnaam = m.Buurtnaam + Wijkcode = m.Wijkcode + Wijknaam = m.Wijknaam + Provinciecode = m.Provinciecode + Provincienaam = m.Provincienaam + PandGasEanAansluitingen = m.PandGasEanAansluitingen + P6GasAansluitingen2020 = m.P6GasAansluitingen2020 + P6Gasm32020 = m.P6Gasm32020 + P6Kwh2020 = m.P6Kwh2020 + P6TotaalPandoppervlakM2 = m.P6TotaalPandoppervlakM2 + PandBouwjaar = m.PandBouwjaar + PandGasAansluitingen = m.PandGasAansluitingen + Gebruiksdoelen = m.Gebruiksdoelen + + // register the columns + RegisteredColumns[Pid.Name] = Pid + RegisteredColumns[Vid.Name] = Vid + RegisteredColumns[Straat.Name] = Straat + RegisteredColumns[Postcode.Name] = Postcode + RegisteredColumns[Huisnummer.Name] = Huisnummer + RegisteredColumns[Huisletter.Name] = Huisletter + RegisteredColumns[Huisnummertoevoeging.Name] = Huisnummertoevoeging + RegisteredColumns[Oppervlakte.Name] = Oppervlakte + RegisteredColumns[Woningequivalent.Name] = Woningequivalent + RegisteredColumns[WoningType.Name] = WoningType + RegisteredColumns[LabelscoreVoorlopig.Name] = LabelscoreVoorlopig + RegisteredColumns[LabelscoreDefinitief.Name] = LabelscoreDefinitief + RegisteredColumns[Energieklasse.Name] = Energieklasse + RegisteredColumns[Gemeentecode.Name] = Gemeentecode + RegisteredColumns[Gemeentenaam.Name] = Gemeentenaam + RegisteredColumns[Buurtcode.Name] = Buurtcode + RegisteredColumns[Buurtnaam.Name] = Buurtnaam + RegisteredColumns[Wijkcode.Name] = Wijkcode + RegisteredColumns[Wijknaam.Name] = Wijknaam + RegisteredColumns[Provinciecode.Name] = Provinciecode + RegisteredColumns[Provincienaam.Name] = Provincienaam + RegisteredColumns[PandGasEanAansluitingen.Name] = PandGasEanAansluitingen + RegisteredColumns[P6GasAansluitingen2020.Name] = P6GasAansluitingen2020 + RegisteredColumns[P6Gasm32020.Name] = P6Gasm32020 + RegisteredColumns[P6Kwh2020.Name] = P6Kwh2020 + RegisteredColumns[P6TotaalPandoppervlakM2.Name] = P6TotaalPandoppervlakM2 + RegisteredColumns[PandBouwjaar.Name] = PandBouwjaar + RegisteredColumns[PandGasAansluitingen.Name] = PandGasAansluitingen + RegisteredColumns[Gebruiksdoelen.Name] = Gebruiksdoelen +} diff --git a/operations.go b/operations.go index 423d1cb..ba74d4d 100644 --- a/operations.go +++ b/operations.go @@ -4,17 +4,50 @@ import ( "encoding/json" "fmt" "net/http" - "net/url" + + // "reflect" + "errors" + "log" + "sort" + + //"sort" "strconv" "strings" "time" + + "github.com/Workiva/go-datastructures/bitarray" + "github.com/go-spatial/geom" + "github.com/go-spatial/geom/encoding/geojson" ) +type filterFuncc func(*Item, string) bool +type registerFuncType map[string]filterFuncc + +type bitsetFuncc func(string) bitarray.BitArray +type registerBitSetType map[string]bitsetFuncc + +type filterType map[string][]string + +func (ft filterType) CacheKey() string { + filterlist := []string{} + for k, v := range ft { + filterlist = append(filterlist, fmt.Sprintf("%s=%s", k, v)) + } + sort.Strings(filterlist) + return strings.Join(filterlist, "-") +} + +type formatRespFunc func(w http.ResponseWriter, r *http.Request, items Items) +type registerFormatMap map[string]formatRespFunc + type Query struct { Filters filterType Excludes filterType Anys filterType + GroupBy string + Reduce string + Limit int LimitGiven bool Page int @@ -28,6 +61,9 @@ type Query struct { IndexQuery string IndexGiven bool + Geometry geom.Geometry + GeometryGiven bool + ReturnFormat string } @@ -35,65 +71,121 @@ func (q Query) EarlyExit() bool { return q.LimitGiven && !q.PageGiven && !q.SortByGiven } -func decodeUrl(s string) string { - newS, err := url.QueryUnescape(s) - if err != nil { - fmt.Println("oh no error", err) - return s +// return cachable key for query +func (q Query) CacheKey() (string, error) { + + if SETTINGS.Get("groupbycache") != "yes" { + return "", errors.New("cache disabled") + } + + if q.EarlyExit() { + return "", errors.New("not cached") + } + if q.GeometryGiven { + return "", errors.New("geo not cached") } - return newS + + for k := range RegisterBitArray { + _, filterFound := q.Filters[k] + if filterFound { + return "", errors.New("bitarrays filters do not need to be cached") + } + } + + if q.EarlyExit() { + return "", errors.New("not cached") + } + + keys := []string{ + q.Filters.CacheKey(), + q.Excludes.CacheKey(), + q.Anys.CacheKey(), + q.GroupBy, + q.Reduce, + q.ReturnFormat, + } + + return strings.Join(keys, "-"), nil + } -// util for api -func parseURLParameters(r *http.Request) Query { +// parseURLParameters checks parameters and builds a query to be run. +func parseURLParameters(r *http.Request) (Query, error) { filterMap := make(filterType) excludeMap := make(filterType) anyMap := make(filterType) - //TODO change query to be based on input. + groupBy := "" + reduce := "" - urlItems := r.URL.Query() + // parse params and body posts // (geo)json data + r.ParseForm() + + if SETTINGS.Get("debug") == "yes" { + for key, value := range r.Form { + fmt.Printf("F %s = %s\n", key, value) + } + } for k := range RegisterFuncMap { - parameter, parameterFound := urlItems[k] + parameter, parameterFound := r.Form[k] if parameterFound && parameter[0] != "" { - newSl := make([]string, len(parameter)) - for i, v := range parameter { - newSl[i] = decodeUrl(v) - } - //filterMap[k] = parameter - filterMap[k] = newSl + filterMap[k] = parameter } - parameter, parameterFound = urlItems["!"+k] + parameter, parameterFound = r.Form["!"+k] if parameterFound && parameter[0] != "" { excludeMap[k] = parameter } - parameter, parameterFound = urlItems["any_"+k] + parameter, parameterFound = r.Form["any_"+k] if parameterFound && parameter[0] != "" { anyMap[k] = parameter } } + // Check and validate groupby parameter + parameter, found := r.Form["groupby"] + + if found && parameter[0] != "" { + _, funcFound1 := RegisterGroupBy[parameter[0]] + _, funcFound2 := RegisterGroupByCustom[parameter[0]] + + if !funcFound1 && !funcFound2 { + return Query{}, errors.New("invalid groupby parameter") + } + groupBy = parameter[0] + } + + // Check and validate reduce parameter + parameter, found = r.Form["reduce"] + + if found && parameter[0] != "" { + _, funcFound := RegisterReduce[parameter[0]] + if !funcFound { + return Query{}, errors.New("Invalid reduce parameter") + } + reduce = parameter[0] + } + // TODO there must be better way page := 1 - pageStr, pageGiven := urlItems["page"] + pageStr, pageGiven := r.Form["page"] if pageGiven { page = intMoreDefault(pageStr[0], 1) } pageSize := 10 - pageSizeStr, pageSizeGiven := urlItems["pagesize"] + pageSizeStr, pageSizeGiven := r.Form["pagesize"] if pageSizeGiven { pageSize = intMoreDefault(pageSizeStr[0], 1) } limit := 0 - limitStr, limitGiven := urlItems["limit"] + limitStr, limitGiven := r.Form["limit"] if limitGiven { limit = intMoreDefault(limitStr[0], 1) } format := "json" - formatStr, formatGiven := urlItems["format"] + formatStr, formatGiven := r.Form["format"] if formatGiven { if formatStr[0] == "csv" { @@ -101,20 +193,37 @@ func parseURLParameters(r *http.Request) Query { } } - sortingL, sortingGiven := urlItems["sortby"] + sortingL, sortingGiven := r.Form["sortby"] index := "" - indexL, indexGiven := urlItems["search"] - indexGiven = indexGiven && (SETTINGS.Get("indexed") == "y") - indexUsed := indexGiven && len(indexL[0]) > 2 + indexL, indexGiven := r.Form["search"] + indexUsed := indexGiven && indexL[0] != "" + if indexUsed { - index = strings.ToLower(indexL[0]) + index = indexL[0] + } + + // check for geojson geometry stuff. + geometryS, geometryGiven := r.Form["geojson"] + var geoinput geojson.Geometry + if geometryGiven && geometryS[0] != "" { + err := json.Unmarshal([]byte(geometryS[0]), &geoinput) + if err != nil { + log.Println("parsing geojson error") + log.Println(err) + geometryGiven = false + return Query{}, errors.New("failed to parse geojson") + } } + return Query{ Filters: filterMap, Excludes: excludeMap, Anys: anyMap, + GroupBy: groupBy, + Reduce: reduce, + Limit: limit, LimitGiven: limitGiven, @@ -129,19 +238,31 @@ func parseURLParameters(r *http.Request) Query { IndexQuery: index, IndexGiven: indexUsed, + Geometry: geoinput.Geometry, + + GeometryGiven: geometryGiven, + ReturnFormat: format, - } + }, nil } -func groupByRunner(items Items, groubByParameter string) ItemsGroupedBy { +func groupByRunner(items Items, groupByParameter string) ItemsGroupedBy { grouping := make(ItemsGroupedBy) - groupingFunc := RegisterGroupBy[groubByParameter] - if groupingFunc == nil { + groupingFunc := RegisterGroupBy[groupByParameter] + + customGrouping := RegisterGroupByCustom[groupByParameter] + + if groupingFunc == nil && customGrouping == nil { return grouping } + for _, item := range items { - GroupingKey := groupingFunc(item) - grouping[GroupingKey] = append(grouping[GroupingKey], item) + if customGrouping == nil { + GroupingKey := groupingFunc(item) + grouping[GroupingKey] = append(grouping[GroupingKey], item) + } else { + customGrouping(item, grouping) + } } return grouping } @@ -211,9 +332,10 @@ func max(a, b int) int { return b } -func filteredEarlyExit(items Items, operations GroupedOperations, query Query) Items { +func filteredEarlyExit(items *Items, operations GroupedOperations, query Query) Items { + registerFuncs := operations.Funcs - filteredItems := make(Items, 0, len(items)/4) + filteredItems := make(Items, 0, len(*items)/4) excludes := query.Excludes filters := query.Filters anys := query.Anys @@ -222,12 +344,12 @@ func filteredEarlyExit(items Items, operations GroupedOperations, query Query) I start := (query.Page - 1) * query.PageSize end := start + query.PageSize stop := end + if query.LimitGiven { stop = limit } - //TODO candidate for speedup - for _, item := range items { + for _, item := range *items { if !any(item, anys, registerFuncs) { continue } @@ -243,10 +365,11 @@ func filteredEarlyExit(items Items, operations GroupedOperations, query Query) I break } } + return filteredItems } -func filteredEarlyExitSingle(items Items, column string, operations GroupedOperations, query Query) []string { +func filteredEarlyExitSingle(items *Items, column string, operations GroupedOperations, query Query) []string { registerFuncs := operations.Funcs filteredItemsSet := make(map[string]bool) excludes := query.Excludes @@ -257,12 +380,13 @@ func filteredEarlyExitSingle(items Items, column string, operations GroupedOpera start := (query.Page - 1) * query.PageSize end := start + query.PageSize stop := end + if query.LimitGiven { stop = limit } - //TODO candidate for speedup - for _, item := range items { + for _, item := range *items { + if !any(item, anys, registerFuncs) { continue } @@ -272,14 +396,24 @@ func filteredEarlyExitSingle(items Items, column string, operations GroupedOpera if !exclude(item, excludes, registerFuncs) { continue } - single := operations.Getters[column](item) - filteredItemsSet[single] = true + + // return single example value for search field + if f, ok := operations.Getters[column]; ok { + single := f(item) + filteredItemsSet[single] = true + } else { + fmt.Print(operations.Getters) + fmt.Println(column) + fmt.Println("missing getter?") + } if len(filteredItemsSet) == stop { break } } + results := []string{} + for k := range filteredItemsSet { // empty keys crashes frontend. // should be fixed in frontend then below can go. @@ -291,80 +425,137 @@ func filteredEarlyExitSingle(items Items, column string, operations GroupedOpera return results } -func runQuery(items Items, query Query, operations GroupedOperations) (Items, int64) { - start := time.Now() - var newItems Items +// BitArray Filter. +// for columns with not so unique values it makes sense te create bitarrays. +// to enable fast bitwise selection operations. +func bitArrayFilter( + items *Items, + query Query) (Items, error) { - //TODO this still needs a cleanup, but it's currently the solution to solve column and the indexes + combinedBitArrays := make([]bitarray.BitArray, 0) - //if query.IndexGiven && len(STR_INDEX) > 0 { - // items = make(Items, 0) - // indices := INDEX.Lookup([]byte(query.IndexQuery), -1) - // seen := make(map[string]bool) - // for _, idx := range indices { - // key := getStringFromIndex(STR_INDEX, idx) - // if !seen[key] { - // seen[key] = true - // for _, item := range LOOKUP[key] { - // items = append(items, item) - // } - // } + for k := range BitArrays { + parameter, foundkey := query.Filters["match-"+k] + + if len(parameter) == 0 { + continue + } + + if !foundkey { + continue + } + + ba, err := GetBitArray(k, parameter[0]) + + if err != nil { + fmt.Println(err) + continue + } + combinedBitArrays = append(combinedBitArrays, ba) - // } - //} - if query.IndexGiven { - items = runIndexQuery(query) } - if query.EarlyExit() { - newItems = filteredEarlyExit(items, operations, query) + var bitArrayResult bitarray.BitArray + + if len(combinedBitArrays) > 0 { + bitArrayResult = combinedBitArrays[0] } else { - newItems = filtered(items, operations, query) + log.Println("no bitarrays found / used") + return nil, errors.New("no bitarray found") } - diff := time.Since(start) - return newItems, int64(diff) / int64(1000000) + + // combine AND bitarrays + if len(combinedBitArrays) > 1 { + for i := range combinedBitArrays[1:] { + bitArrayResult = bitArrayResult.And(combinedBitArrays[i]) + } + } + + // TODO OR + // TODO EXCLUDE + + if bitArrayResult == nil { + log.Fatal("something went wrong with bitarray..") + } + + newItems := make(Items, 0) + labels := bitArrayResult.ToNums() + + for _, l := range labels { + newItems = append(newItems, (*items)[int(l)]) + } + + return newItems, nil } -func runTypeAheadQuery(items Items, column string, query Query, operations GroupedOperations) ([]string, int64) { +func runQuery(items *Items, query Query, operations GroupedOperations) (Items, int64) { start := time.Now() - if query.IndexGiven { - items = runIndexQuery(query) + + if query.GeometryGiven { + cu := CoverDefault(query.Geometry) + if len(cu) == 0 { + log.Println("covering cell union not created") + } else { + geoitems := SearchGeoItems(cu) + items = &geoitems + // log.Printf("geo matched %d \n", len(geoitems)) + } } - results := filteredEarlyExitSingle(items, column, operations, query) - diff := time.Since(start) - return results, int64(diff) / int64(1000000) -} -func runIndexQuery(query Query) Items { - items := make(Items, 0) - indices := INDEX.Lookup([]byte(query.IndexQuery), -1) - seen := make(map[string]bool) - added := make(map[int]bool) - for _, idx := range indices { - key := getStringFromIndex(STR_INDEX, idx) - if !seen[key] { - seen[key] = true - for _, index := range LOOKUPINDEX[key] { - if _, ok := added[index]; !ok { - added[index] = true - items = append(items, ITEMS[index]) - } + var nextItems *Items + filteredItems, err := bitArrayFilter(items, query) + if err != nil { + nextItems = items + } else { + nextItems = &filteredItems + } + + if query.IndexGiven && len(STR_INDEX) > 0 { + items := make(Items, 0) + indices := INDEX.Lookup([]byte(query.IndexQuery), -1) + seen := make(map[string]bool) + for _, idx := range indices { + key := getStringFromIndex(STR_INDEX, idx) + if !seen[key] { + seen[key] = true + items = append(items, LOOKUP[key]...) } } + } + var newItems Items + + if query.EarlyExit() { + newItems = filteredEarlyExit(nextItems, operations, query) + } else { + newItems = filtered(nextItems, operations, query) } - return items + + diff := time.Since(start) + + log.Printf("items matched %d \n", len(newItems)) + + return newItems, int64(diff) / int64(1000000) } -func filtered(items Items, operations GroupedOperations, query Query) Items { +func runTypeAheadQuery( + items *Items, column string, query Query, + operations GroupedOperations) ([]string, int64) { + start := time.Now() + results := filteredEarlyExitSingle(items, column, operations, query) + diff := time.Since(start) + return results, int64(diff) / int64(1000000) +} + +func filtered(items *Items, operations GroupedOperations, query Query) Items { registerFuncs := operations.Funcs + filteredItems := make(Items, 0) excludes := query.Excludes filters := query.Filters anys := query.Anys - filteredItems := make(Items, 0) - for _, item := range items { + for _, item := range *items { if !any(item, anys, registerFuncs) { continue } @@ -390,28 +581,20 @@ func mapIndex(items Items, indexes []int) Items { type HeaderData map[string]string func getHeaderData(items Items, query Query, queryDuration int64) HeaderData { - headerData := make(HeaderData) - - if query.LimitGiven { - headerData["Limit"] = strconv.Itoa(query.Limit) - } - - if query.PageGiven { - headerData["Page"] = strconv.Itoa(query.Page) - headerData["Page-Size"] = strconv.Itoa(query.PageSize) - headerData["Total-Pages"] = strconv.Itoa((len(items) / query.PageSize) + 1) - } - - headerData["Total-Items"] = strconv.Itoa(len(items)) - headerData["Query-Duration"] = strconv.FormatInt(queryDuration, 10) + "ms" - bytesQuery, _ := json.Marshal(query) - headerData["query"] = string(bytesQuery) - + matched := int64(len(items)) + headerData := getHeaderDataShared(query, queryDuration, matched) return headerData } //getHeaderDataSlice extract from header information with data slice we want func getHeaderDataSlice(items []string, query Query, queryDuration int64) HeaderData { + matched := int64(len(items)) + headerData := getHeaderDataShared(query, queryDuration, matched) + return headerData +} + +func getHeaderDataShared(query Query, queryDuration int64, matched int64) HeaderData { + headerData := make(HeaderData) if query.LimitGiven { @@ -421,10 +604,11 @@ func getHeaderDataSlice(items []string, query Query, queryDuration int64) Header if query.PageGiven { headerData["Page"] = strconv.Itoa(query.Page) headerData["Page-Size"] = strconv.Itoa(query.PageSize) - headerData["Total-Pages"] = strconv.Itoa((len(items) / query.PageSize) + 1) + headerData["Total-Pages"] = strconv.Itoa(int(matched)/query.PageSize + 1) } - headerData["Total-Items"] = strconv.Itoa(len(items)) + headerData["Total-Items"] = strconv.FormatInt(matched, 10) + headerData["Cache-Control"] = "public, max-age=300" headerData["Query-Duration"] = strconv.FormatInt(queryDuration, 10) + "ms" bytesQuery, _ := json.Marshal(query) headerData["query"] = string(bytesQuery) @@ -456,7 +640,6 @@ func sortLimit(items Items, query Query) Items { } // Note the slice built on array, slicing a slice larger then the the slice adds array items - // https://play.golang.org/p/GxhbBGNaXwL if len(items) < query.Limit { return items } diff --git a/parse_pg_array.go b/parse_pg_array.go new file mode 100644 index 0000000..42f6095 --- /dev/null +++ b/parse_pg_array.go @@ -0,0 +1,49 @@ +package main + +import ( + "bytes" + "errors" +) + +func ParsePGArray(array string) ([]string, error) { + var out []string + var arrayOpened, quoteOpened, escapeOpened bool + item := &bytes.Buffer{} + for _, r := range array { + switch { + case !arrayOpened: + if r != '{' { + return nil, errors.New("Doesn't appear to be a postgres array. Doesn't start with an opening curly brace.") + } + arrayOpened = true + case escapeOpened: + item.WriteRune(r) + escapeOpened = false + case quoteOpened: + switch r { + case '\\': + escapeOpened = true + case '"': + quoteOpened = false + if item.String() == "NULL" { + item.Reset() + } + default: + item.WriteRune(r) + } + case r == '}': + // done + out = append(out, item.String()) + return out, nil + case r == '"': + quoteOpened = true + case r == ',': + // end of item + out = append(out, item.String()) + item.Reset() + default: + item.WriteRune(r) + } + } + return nil, errors.New("Doesn't appear to be a postgres array. Premature end of string.") +} diff --git a/parse_pg_array_test.go b/parse_pg_array_test.go new file mode 100644 index 0000000..d0a8ca0 --- /dev/null +++ b/parse_pg_array_test.go @@ -0,0 +1,37 @@ +package main + +import ( + "reflect" + "testing" +) + +func TestParseArray(t *testing.T) { + scanTests := []struct { + in string + out []string + }{ + {"{one,two}", []string{"one", "two"}}, + {`{"one, sdf",two}`, []string{"one, sdf", "two"}}, + {`{"\"one\"",two}`, []string{`"one"`, "two"}}, + {`{"\\one\\",two}`, []string{`\one\`, "two"}}, + {`{"{one}",two}`, []string{`{one}`, "two"}}, + {`{"one two"}`, []string{`one two`}}, + {`{"one,two"}`, []string{`one,two`}}, + {`{abcdef:83bf98cc-fec9-4e77-b4cf-99f9fb6655fa-0NH:zxcvzxc:wers:vxdfw-asdf-asdf}`, []string{"abcdef:83bf98cc-fec9-4e77-b4cf-99f9fb6655fa-0NH:zxcvzxc:wers:vxdfw-asdf-asdf"}}, + {`{"",two}`, []string{"", "two"}}, + {`{" ","NULL"}`, []string{" ", ""}}, + } + + for tcNumber, testcase := range scanTests { + result, err := ParsePGArray(testcase.in) + if err != nil { + t.Error("testcase", tcNumber, "gave error") + } + if len(result) == 0 { + t.Error("testcase", tcNumber, "expected", "found", "!=", "not found") + } + if !reflect.DeepEqual(result, testcase.out) { + t.Error("testcase", tcNumber, "expected", testcase.out, "!=", result) + } + } +} diff --git a/storage_operations.go b/storage_operations.go index 0a84c2c..e142f56 100644 --- a/storage_operations.go +++ b/storage_operations.go @@ -12,6 +12,17 @@ import ( "time" ) +type Store struct { + Items Items + Maps ModelMaps +} + +type storageFunc func(string) (int64, error) +type retrieveFunc func(string) (int, error) + +type storageFuncs map[string]storageFunc +type retrieveFuncs map[string]retrieveFunc + var STORAGEFUNCS storageFuncs var RETRIEVEFUNCS retrieveFuncs @@ -20,19 +31,18 @@ func init() { STORAGEFUNCS["bytes"] = saveAsBytes // currently default STORAGEFUNCS["bytesz"] = saveAsBytesCompressed STORAGEFUNCS["json"] = saveAsJsonZipped - STORAGEFUNCS["jsonz"] = saveAsJsonZipped RETRIEVEFUNCS = make(retrieveFuncs) RETRIEVEFUNCS["bytes"] = loadAsBytes // currently default RETRIEVEFUNCS["bytesz"] = loadAsBytesCompressed RETRIEVEFUNCS["json"] = loadAsJsonZipped - RETRIEVEFUNCS["jsonz"] = loadAsJsonZipped } -func saveAsJsonZipped(items Items, filename string) (int64, error) { +func saveAsJsonZipped(filename string) (int64, error) { + store := makeStore() var b bytes.Buffer writer := gzip.NewWriter(&b) - itemJSON, _ := json.Marshal(ITEMS) + itemJSON, _ := json.Marshal(store) writer.Write(itemJSON) writer.Flush() writer.Close() @@ -49,8 +59,20 @@ func saveAsJsonZipped(items Items, filename string) (int64, error) { return size, nil } -func saveAsBytes(items Items, filename string) (int64, error) { - data := EncodeItems(items) +func makeStore() Store { + return Store{ITEMS, CreateMapstore()} +} + +func restoreStore(store Store) { + ITEMS = store.Items + LoadMapstore(store.Maps) + // rebuild indexes + ITEMS.FillIndexes() +} + +func saveAsBytes(filename string) (int64, error) { + store := makeStore() + data := EncodeItems(store) WriteToFile(data, filename) fi, err := os.Stat(filename) if err != nil { @@ -61,8 +83,9 @@ func saveAsBytes(items Items, filename string) (int64, error) { return size, nil } -func saveAsBytesCompressed(items Items, filename string) (int64, error) { - data := EncodeItems(items) +func saveAsBytesCompressed(filename string) (int64, error) { + store := makeStore() + data := EncodeItems(store) data = Compress(data) WriteToFile(data, filename) fi, err := os.Stat(filename) @@ -74,10 +97,10 @@ func saveAsBytesCompressed(items Items, filename string) (int64, error) { return size, nil } -func EncodeItems(items Items) []byte { +func EncodeItems(s Store) []byte { buf := bytes.Buffer{} enc := gob.NewEncoder(&buf) - err := enc.Encode(items) + err := enc.Encode(s) if err != nil { fmt.Println("error encoding", err) } @@ -103,14 +126,14 @@ func Decompress(s []byte) []byte { return data } -func DecodeToItems(s []byte) Items { - items := make(Items, 0, 100*1000) +func DecodeToStore(s []byte) Store { + store := Store{} decoder := gob.NewDecoder(bytes.NewReader(s)) - err := decoder.Decode(&items) + err := decoder.Decode(&store) if err != nil { - fmt.Println("Unable to DecodeToItems", err) + fmt.Println("Unable to Decode", err) } - return items + return store } func WriteToFile(s []byte, filename string) { @@ -133,23 +156,22 @@ func ReadFromFile(filename string) []byte { return data } -func loadAsBytes(items Items, filename string) (int, error) { +func loadAsBytes(filename string) (int, error) { d := ReadFromFile(filename) - items = DecodeToItems(d) - ITEMS = items - return len(items), nil + store := DecodeToStore(d) + restoreStore(store) + return len(ITEMS), nil } -func loadAsBytesCompressed(items Items, filename string) (int, error) { +func loadAsBytesCompressed(filename string) (int, error) { d := ReadFromFile(filename) d = Decompress(d) - items = DecodeToItems(d) - ITEMS = make(Items, 0, 100*1000) - ITEMS = items - return len(items), nil + store := DecodeToStore(d) + restoreStore(store) + return len(ITEMS), nil } -func loadAsJsonZipped(items Items, filename string) (int, error) { +func loadAsJsonZipped(filename string) (int, error) { fi, err := os.Open(filename) if err != nil { _, err2 := os.Getwd() @@ -166,16 +188,15 @@ func loadAsJsonZipped(items Items, filename string) (int, error) { } defer fz.Close() - // TODO buffered instead of one big chunk s, err := ioutil.ReadAll(fz) if err != nil { return 0, err } - ITEMS = make(Items, 0, 100*1000) - json.Unmarshal(s, &ITEMS) - + store := makeStore() + json.Unmarshal(s, &store) + restoreStore(store) // GC friendly s = nil return len(ITEMS), nil @@ -195,7 +216,7 @@ func loadAtStart(storagename string, filename string, indexed bool) { fmt.Printf(WarningColorN, msg) start := time.Now() - itemsAdded, err := retrievefunc(ITEMS, filename) + itemsAdded, err := retrievefunc(filename) if err != nil { log.Fatal(fmt.Sprintf("could not open %s reason %s", filename, err)) } @@ -204,6 +225,7 @@ func loadAtStart(storagename string, filename string, indexed bool) { msg = fmt.Sprint("Loaded in memory amount: ", itemsAdded, " time: ", diff) fmt.Printf(WarningColorN, msg) + /* should be added to FillIndexes if indexed { start = time.Now() msg := fmt.Sprint("Creating index") @@ -213,4 +235,5 @@ func loadAtStart(storagename string, filename string, indexed bool) { msg = fmt.Sprint("Index set time: ", diff) fmt.Printf(WarningColorN, msg) } + */ } diff --git a/storage_operations_test.go b/storage_operations_test.go new file mode 100644 index 0000000..567f186 --- /dev/null +++ b/storage_operations_test.go @@ -0,0 +1,62 @@ +package main + +import ( + "testing" +) + +func TestBytesSaving(t *testing.T) { + + size := len(ITEMS) + + if size != 10 { + t.Errorf("expected 10 ITEMS got %d", size) + } + +} + +func TestBytes(t *testing.T) { + + saveAsBytes("testdata/testbytes") + + RegisteredColumns = make(ColumnRegister) + ITEMS = Items{} + + clearBitArrays() + clearGeoIndex() + + loadAsBytes("testdata/testbytes") + + if len(ITEMS) != 10 { + t.Error("bytes save / load failed") + } + + saveAsBytes("testdata/testbytesz") + ITEMS = Items{} + loadAsBytes("testdata/testbytesz") + if len(ITEMS) != 10 { + t.Error("bytes compressed save / load failed") + } + + if len(BitArrays) == 0 { + t.Error("bitarrays are not restored") + } + + if len(S2CELLS) == 0 { + t.Error("geoindex is not restored") + } + + if len(RegisteredColumns) == 0 { + t.Error("colom register is not restored") + } + +} + +func TestJson(t *testing.T) { + + saveAsJsonZipped("testdata/test.json") + ITEMS = Items{} // Clear ITEMS + loadAsJsonZipped("testdata/test.json") + if len(ITEMS) != 10 { + t.Error("bytes compressed save / load failed") + } +} diff --git a/store.go b/store.go new file mode 100644 index 0000000..ce31e36 --- /dev/null +++ b/store.go @@ -0,0 +1,72 @@ +package main + +import ( + "fmt" + "log" + "runtime" + "sync" + "time" +) + +//Items +type Items []*Item +type ItemsIn []*ItemIn +type ItemsOut []*ItemOut + +type ItemsGroupedBy map[string]Items +type ItemsChannel chan ItemsIn + +var ITEMS Items + +var itemChan ItemsChannel + +// single item map lock when updating new items +var lock = sync.RWMutex{} + +func init() { + ITEMS = Items{} +} + +func ItemChanWorker(itemChan ItemsChannel) { + label := 0 + for items := range itemChan { + lock.Lock() + for _, itm := range items { + if itm != nil { + smallItem := itm.Shrink(label) + smallItem.StoreBitArrayColumns() + ITEMS = append(ITEMS, &smallItem) + if ITEMS[label] != &smallItem { + log.Fatal("storing item index off") + } + smallItem.GeoIndex(label) + label++ + } + } + lock.Unlock() + } +} + +func (items *Items) FillIndexes() { + + start := time.Now() + + lock.Lock() + defer lock.Unlock() + + clearGeoIndex() + clearBitArrays() + + for i := range *items { + ITEMS[i].StoreBitArrayColumns() + ITEMS[i].GeoIndex(ITEMS[i].Label) + } + + S2CELLS.Sort() + + diff := time.Since(start) + msg := fmt.Sprint("Index set time: ", diff) + fmt.Printf(WarningColorN, msg) + // run garbadge collection + runtime.GC() +} diff --git a/testdata/README.md b/testdata/README.md new file mode 100644 index 0000000..19ed6b1 --- /dev/null +++ b/testdata/README.md @@ -0,0 +1,3 @@ +To create new testdata: + +- curl -O 'http://127.0.0.1:8000/list/?match-postcode=1011AB&startswith-huisnummer=10&sortby=huisnummer&format=csv' diff --git a/testdata/dataselectie_vbo_energie_20210505.head.csv b/testdata/dataselectie_vbo_energie_20210505.head.csv new file mode 100644 index 0000000..0437c02 --- /dev/null +++ b/testdata/dataselectie_vbo_energie_20210505.head.csv @@ -0,0 +1,11 @@ +pid,vid,numid,straat,postcode,huisnummer,huisletter,huisnummertoevoeging,oppervlakte,woningequivalent,woning_type,labelscore_voorlopig,labelscore_definitief,energieklasse,gemeentecode,gemeentenaam,buurtcode,buurtnaam,wijkcode,wijknaam,provinciecode,provincienaam,point,pand_gas_ean_aansluitingen,group_id_2020,p6_gas_aansluitingen_2020,p6_gasm3_2020,p6_kwh_2020,p6_totaal_pandoppervlak_m2,pand_bouwjaar,pand_gas_aansluitingen,gebruiksdoelen +0363100012181960,0363010000785105,0363200000081085,De Ruijterkade,1011AB,105,,H,348,3,Niet wonen,0,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{bijeenkomstfunctie} +0363100012181960,0363010000784610,0363200000081087,De Ruijterkade,1011AB,105,,2,72,1,Flat/appartement,7,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{woonfunctie} +0363100012181960,0363010000964973,0363200000081086,De Ruijterkade,1011AB,105,,1,174,1,Niet wonen,0,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{kantoorfunctie} +0363100012181960,0363010000977861,0363200000081088,De Ruijterkade,1011AB,105,,3,84,1,Flat/appartement,7,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{woonfunctie} +0363100012181960,0363010000618793,0363200000081090,De Ruijterkade,1011AB,106,,2,82,1,Flat/appartement,7,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{woonfunctie} +0363100012181960,0363010012064067,0363200012064203,De Ruijterkade,1011AB,106,,1,1,0,Niet wonen,0,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{kantoorfunctie} +0363100012181960,0363010000618799,0363200000081091,De Ruijterkade,1011AB,106,,3,60,1,Flat/appartement,7,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.9052328744478375 52.37780474588784),6,0,0,0,14940,6408,1884,6,{woonfunctie} +0363100012181958,0363010000618800,0363200000414309,De Ruijterkade,1011AB,107,,H,280,2,Niet wonen,0,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.90541865504259 52.37775355302933),1,0,0,0,14940,6408,1005,1,{overige gebruiksfunctie} +0363100012181958,0363010000977860,0363200000081092,De Ruijterkade,1011AB,107,,,1774,14,Niet wonen,0,0,D,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.90541865504259 52.37775355302933),1,0,0,0,14940,6408,1005,1,{kantoorfunctie} +0363100012181958,0363010000618801,0363200000414310,De Ruijterkade,1011AB,107,,5,308,1,Flat/appartement,7,0,,GM0363,Amsterdam,BU03630400,Oosterdokseiland,WK036304,Nieuwmarkt/Lastage,PV27,Noord-Holland ,POINT (4.90541865504259 52.37775355302933),1,0,0,0,14940,6408,1005,1,{woonfunctie}