diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index ee65ccb5..69cfc6ad 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -8,9 +8,10 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] -name: R-CMD-check +name: R-CMD-check.yaml + +permissions: read-all jobs: R-CMD-check: @@ -25,24 +26,22 @@ jobs: - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - # Use 3.6 to trigger usage of RTools35 - - {os: windows-latest, r: '3.6'} - # use 4.1 to check with rtools40's older compiler - - {os: windows-latest, r: '4.1'} - - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} - - {os: ubuntu-latest, r: 'oldrel-2'} - - {os: ubuntu-latest, r: 'oldrel-3'} - - {os: ubuntu-latest, r: 'oldrel-4'} + # use 4.0 or 4.1 to check with rtools40's older compiler + - {os: windows-latest, r: 'oldrel-4'} + + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'oldrel-2'} + - {os: ubuntu-latest, r: 'oldrel-3'} + - {os: ubuntu-latest, r: 'oldrel-4'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -60,3 +59,4 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index ed7650c7..bfc9f4db 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -4,12 +4,13 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] release: types: [published] workflow_dispatch: -name: pkgdown +name: pkgdown.yaml + +permissions: read-all jobs: pkgdown: @@ -22,7 +23,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -41,7 +42,7 @@ jobs: - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.4.1 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: clean: false branch: gh-pages diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml index 71f335b3..2edd93f2 100644 --- a/.github/workflows/pr-commands.yaml +++ b/.github/workflows/pr-commands.yaml @@ -4,7 +4,9 @@ on: issue_comment: types: [created] -name: Commands +name: pr-commands.yaml + +permissions: read-all jobs: document: @@ -13,8 +15,10 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: @@ -50,8 +54,10 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 27d45283..0ab748d6 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -4,9 +4,10 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] -name: test-coverage +name: test-coverage.yaml + +permissions: read-all jobs: test-coverage: @@ -15,7 +16,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: @@ -23,28 +24,39 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::covr + extra-packages: any::covr, any::xml2 needs: coverage - name: Test coverage run: | - covr::codecov( + cov <- covr::package_coverage( quiet = FALSE, clean = FALSE, install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) + print(cov) + covr::to_cobertura(cov) shell: Rscript {0} + - uses: codecov/codecov-action@v5 + with: + # Fail if error if not on PR, or if on PR and token is given + fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} + files: ./cobertura.xml + plugins: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + - name: Show testthat output if: always() run: | ## -------------------------------------------------------------------- - find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package diff --git a/README.Rmd b/README.Rmd index 08011c62..4f000083 100644 --- a/README.Rmd +++ b/README.Rmd @@ -18,7 +18,7 @@ knitr::opts_chunk$set( [![CRAN status](https://www.r-pkg.org/badges/version/haven)](https://cran.r-project.org/package=haven) [![R-CMD-check](https://github.com/tidyverse/haven/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidyverse/haven/actions/workflows/R-CMD-check.yaml) -[![Codecov test coverage](https://codecov.io/gh/tidyverse/haven/branch/main/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/haven?branch=main) +[![Codecov test coverage](https://codecov.io/gh/tidyverse/haven/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/haven) ## Overview diff --git a/README.md b/README.md index 3c454d7a..3492b802 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ status](https://www.r-pkg.org/badges/version/haven)](https://cran.r-project.org/package=haven) [![R-CMD-check](https://github.com/tidyverse/haven/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidyverse/haven/actions/workflows/R-CMD-check.yaml) [![Codecov test -coverage](https://codecov.io/gh/tidyverse/haven/branch/main/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/haven?branch=main) +coverage](https://codecov.io/gh/tidyverse/haven/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/haven) ## Overview diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 1ab93464..f2823b66 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -110,9 +110,9 @@ extern "C" SEXP _haven_write_xpt_(SEXP data, SEXP path, SEXP version, SEXP name, extern "C" { /* .Call calls */ -extern SEXP is_tagged_na_(void *, void *); -extern SEXP na_tag_(void *); -extern SEXP tagged_na_(void *); +extern SEXP is_tagged_na_(SEXP, SEXP); +extern SEXP na_tag_(SEXP); +extern SEXP tagged_na_(SEXP); static const R_CallMethodDef CallEntries[] = { {"_haven_df_parse_dta_file", (DL_FUNC) &_haven_df_parse_dta_file, 6}, diff --git a/src/readstat/readstat.h b/src/readstat/readstat.h index bf2c375e..c6ece530 100644 --- a/src/readstat/readstat.h +++ b/src/readstat/readstat.h @@ -104,11 +104,22 @@ typedef enum readstat_error_e { READSTAT_ERROR_TOO_FEW_COLUMNS, READSTAT_ERROR_TOO_MANY_COLUMNS, READSTAT_ERROR_NAME_IS_ZERO_LENGTH, - READSTAT_ERROR_BAD_TIMESTAMP_VALUE + READSTAT_ERROR_BAD_TIMESTAMP_VALUE, + READSTAT_ERROR_BAD_MR_STRING } readstat_error_t; const char *readstat_error_message(readstat_error_t error_code); +typedef struct mr_set_s { + char type; + char *name; + char *label; + int is_dichotomy; + int counted_value; + char **subvariables; + int num_subvars; +} mr_set_t; + typedef struct readstat_metadata_s { int64_t row_count; int64_t var_count; @@ -121,6 +132,8 @@ typedef struct readstat_metadata_s { const char *file_label; const char *file_encoding; unsigned int is64bit:1; + size_t multiple_response_sets_length; + mr_set_t *mr_sets; } readstat_metadata_t; /* If the row count is unknown (e.g. it's an XPORT or POR file, or an SAV @@ -138,6 +151,8 @@ readstat_endian_t readstat_get_endianness(readstat_metadata_t *metadata); const char *readstat_get_table_name(readstat_metadata_t *metadata); const char *readstat_get_file_label(readstat_metadata_t *metadata); const char *readstat_get_file_encoding(readstat_metadata_t *metadata); +const mr_set_t *readstat_get_multiple_response_sets(readstat_metadata_t *metadata); +size_t readstat_get_multiple_response_sets_length(readstat_metadata_t *metadata); typedef struct readstat_value_s { union { diff --git a/src/readstat/readstat_metadata.c b/src/readstat/readstat_metadata.c index 25345392..97877657 100644 --- a/src/readstat/readstat_metadata.c +++ b/src/readstat/readstat_metadata.c @@ -43,3 +43,11 @@ const char *readstat_get_file_encoding(readstat_metadata_t *metadata) { const char *readstat_get_table_name(readstat_metadata_t *metadata) { return metadata->table_name; } + +size_t readstat_get_multiple_response_sets_length(readstat_metadata_t *metadata) { + return metadata->multiple_response_sets_length; +} + +const mr_set_t *readstat_get_multiple_response_sets(readstat_metadata_t *metadata) { + return metadata->mr_sets; +} diff --git a/src/readstat/sas/ieee.c b/src/readstat/sas/ieee.c index 4e7bc28b..b74fc680 100644 --- a/src/readstat/sas/ieee.c +++ b/src/readstat/sas/ieee.c @@ -366,7 +366,7 @@ void ieee2xpt(unsigned char *ieee, unsigned char *xport) { shift = (int) (ieee_exp = (int)(((ieee1 >> 16) & 0x7ff0) >> 4) - 1023) & 3; - /* the ieee format has an implied "1" immdeiately to the left */ + /* the ieee format has an implied "1" immediately to the left */ /* of the binary point. Show it in here. */ xport1 |= 0x00100000; if (shift) @@ -377,7 +377,7 @@ void ieee2xpt(unsigned char *ieee, unsigned char *xport) { /* from the lower half that would have been shifted in (if */ /* we could shift a double). The shift count can never */ /* exceed 3, so all we care about are the high order 3 */ - /* bits. We don't want sign extention so make sure it's an */ + /* bits. We don't want sign extension so make sure it's an */ /* unsigned char. We'll shift either5, 6, or 7 places to */ /* keep 3, 2, or 1 bits. After that, shift the second half */ /* of the number the right number of places. We always get */ @@ -391,9 +391,9 @@ void ieee2xpt(unsigned char *ieee, unsigned char *xport) { /* Now set the ibm exponent and the sign of the fraction. The */ /* power of 2 ieee exponent must be divided by 4 and made */ - /* excess 64 (we add 65 here because of the poisition of the */ + /* excess 64 (we add 65 here because of the position of the */ /* fraction bits, essentially 4 positions lower than they */ - /* should be so we incrment the ibm exponent). */ + /* should be so we increment the ibm exponent). */ xport1 |= diff --git a/src/readstat/sas/readstat_sas.c b/src/readstat/sas/readstat_sas.c index 6d93bd08..5dfd583e 100644 --- a/src/readstat/sas/readstat_sas.c +++ b/src/readstat/sas/readstat_sas.c @@ -124,7 +124,8 @@ static time_t sas_epoch(void) { return - 3653 * 86400; // seconds between 01-01-1960 and 01-01-1970 } -static time_t sas_convert_time(double time, time_t epoch) { +static time_t sas_convert_time(double time, double time_diff, time_t epoch) { + time -= time_diff; time += epoch; if (isnan(time)) return 0; @@ -212,7 +213,7 @@ readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, goto cleanup; } - double creation_time, modification_time; + double creation_time, modification_time, creation_time_diff, modification_time_diff; if (io->read(&creation_time, sizeof(double), io->io_ctx) < sizeof(double)) { retval = READSTAT_ERROR_READ; @@ -228,13 +229,22 @@ readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, if (bswap) modification_time = byteswap_double(modification_time); - hinfo->creation_time = sas_convert_time(creation_time, epoch); - hinfo->modification_time = sas_convert_time(modification_time, epoch); - - if (io->seek(16, READSTAT_SEEK_CUR, io->io_ctx) == -1) { - retval = READSTAT_ERROR_SEEK; + if (io->read(&creation_time_diff, sizeof(double), io->io_ctx) < sizeof(double)) { + retval = READSTAT_ERROR_READ; goto cleanup; } + if (bswap) + creation_time_diff = byteswap_double(creation_time_diff); + + if (io->read(&modification_time_diff, sizeof(double), io->io_ctx) < sizeof(double)) { + retval = READSTAT_ERROR_READ; + goto cleanup; + } + if (bswap) + modification_time_diff = byteswap_double(modification_time_diff); + + hinfo->creation_time = sas_convert_time(creation_time, creation_time_diff, epoch); + hinfo->modification_time = sas_convert_time(modification_time, modification_time_diff, epoch); uint32_t header_size, page_size; @@ -299,9 +309,9 @@ readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, retval = READSTAT_ERROR_READ; goto cleanup; } - char major; + char major, revision_tag; int minor, revision; - if (sscanf(header_end.release, "%c.%04dM%1d", &major, &minor, &revision) != 3) { + if (sscanf(header_end.release, "%c.%04d%c%1d", &major, &minor, &revision_tag, &revision) != 4) { retval = READSTAT_ERROR_PARSE; goto cleanup; } @@ -316,6 +326,11 @@ readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, retval = READSTAT_ERROR_PARSE; goto cleanup; } + // revision_tag is usually M, but J has been observed in the wild (not created with SAS?) + if (revision_tag != 'M' && revision_tag != 'J') { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } hinfo->minor_version = minor; hinfo->revision = revision; diff --git a/src/readstat/sas/readstat_sas7bcat_read.c b/src/readstat/sas/readstat_sas7bcat_read.c index b3e14f0e..20eddb67 100644 --- a/src/readstat/sas/readstat_sas7bcat_read.c +++ b/src/readstat/sas/readstat_sas7bcat_read.c @@ -60,7 +60,7 @@ static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, siz /* Pass 1 -- find out the offset of the labels */ for (i=0; i value_labels_len || sas_read2(&lbp1[2], ctx->bswap) < 0) { + if (&lbp1[4] - value_start > value_labels_len || sas_read2(&lbp1[2], ctx->bswap) < 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } @@ -107,16 +107,26 @@ static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, siz sas_assign_tag(&value, (val >> 40)); } else { memcpy(&dval, &val, 8); - dval *= -1.0; + if (dval > 0.0) { + val = ~val; + memcpy(&dval, &val, 8); + } else { + dval *= -1.0; + } } value.v.double_value = dval; } size_t label_len = sas_read2(&lbp2[8], ctx->bswap); - if (&lbp2[10] + label_len - value_start > value_labels_len) { + if (&lbp2[10] > value_start + value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } + /* Some labels seem to overflow the reported block length, truncate it */ + /* (Observed with formats.sasbcat from GSS2021, produced with 9.0401M6X64_SR12R2 */ + if (label_len > value_start + value_labels_len - &lbp2[10]) { + label_len = value_start + value_labels_len - &lbp2[10]; + } if (ctx->value_label_handler) { label = realloc(label, 4 * label_len + 1); retval = readstat_convert(label, 4 * label_len + 1, diff --git a/src/readstat/sas/readstat_sas7bcat_write.c b/src/readstat/sas/readstat_sas7bcat_write.c index 6544798c..da9a84da 100644 --- a/src/readstat/sas/readstat_sas7bcat_write.c +++ b/src/readstat/sas/readstat_sas7bcat_write.c @@ -46,7 +46,8 @@ static sas7bcat_block_t *sas7bcat_block_for_label_set(readstat_label_set_t *r_la memcpy(&block->data[38], &count, sizeof(int32_t)); memcpy(&block->data[42], &count, sizeof(int32_t)); if (name_len > 8) { - block->data[2] = (char)0x80; + int16_t flags = 0x80; + memcpy(&block->data[2], &flags, sizeof(int16_t)); memcpy(&block->data[8], name, 8); memset(&block->data[106], ' ', 32); @@ -63,7 +64,8 @@ static sas7bcat_block_t *sas7bcat_block_for_label_set(readstat_label_set_t *r_la for (j=0; jvalue_labels_count; j++) { readstat_value_label_t *value_label = readstat_get_value_label(r_label_set, j); - lbp1[2] = 24; // size - 6 + int16_t value_entry_len = 24; // size - 6 + memcpy(&lbp1[2], &value_entry_len, sizeof(int16_t)); int32_t index = j; memcpy(&lbp1[10], &index, sizeof(int32_t)); if (r_label_set->type == READSTAT_TYPE_STRING) { @@ -74,8 +76,14 @@ static sas7bcat_block_t *sas7bcat_block_for_label_set(readstat_label_set_t *r_la memcpy(&lbp1[14], value_label->string_key, string_len); } else { uint64_t big_endian_value; - double double_value = -1.0 * value_label->double_key; - memcpy(&big_endian_value, &double_value, sizeof(double)); + double double_value = value_label->double_key; + if (double_value >= 0.0) { + double_value *= -1.0; + memcpy(&big_endian_value, &double_value, sizeof(double)); + } else { + memcpy(&big_endian_value, &double_value, sizeof(double)); + big_endian_value = ~big_endian_value; + } if (machine_is_little_endian()) { big_endian_value = byteswap8(big_endian_value); } @@ -86,7 +94,7 @@ static sas7bcat_block_t *sas7bcat_block_for_label_set(readstat_label_set_t *r_la memcpy(&lbp2[8], &label_len, sizeof(int16_t)); memcpy(&lbp2[10], value_label->label, label_len); - lbp1 += 30; + lbp1 += 6 + value_entry_len; lbp2 += 8 + 2 + value_label->label_len + 1; } @@ -138,16 +146,15 @@ static readstat_error_t sas7bcat_begin_data(void *writer_ctx) { // Page 1 char *xlsr = &page[856]; - int16_t block_idx, block_off; - block_idx = 4; - block_off = 16; + int32_t block_idx = 4; + int16_t block_off = 16; for (i=0; ilabel_sets_count; i++) { if (xlsr + 212 > page + hinfo->page_size) break; memcpy(&xlsr[0], "XLSR", 4); - memcpy(&xlsr[4], &block_idx, sizeof(int16_t)); + memcpy(&xlsr[4], &block_idx, sizeof(int32_t)); memcpy(&xlsr[8], &block_off, sizeof(int16_t)); xlsr[50] = 'O'; diff --git a/src/readstat/sas/readstat_sas7bdat_read.c b/src/readstat/sas/readstat_sas7bdat_read.c index 1a745a79..20c76a8b 100644 --- a/src/readstat/sas/readstat_sas7bdat_read.c +++ b/src/readstat/sas/readstat_sas7bdat_read.c @@ -20,7 +20,8 @@ typedef struct col_info_s { uint64_t offset; uint32_t width; int type; - int format_len; + int format_width; + int format_digits; } col_info_t; typedef struct subheader_pointer_s { @@ -376,8 +377,10 @@ static readstat_error_t sas7bdat_parse_column_format_subheader(const char *subhe if ((retval = sas7bdat_realloc_col_info(ctx, ctx->col_formats_count)) != READSTAT_OK) goto cleanup; - if (ctx->u64) - ctx->col_info[ctx->col_formats_count-1].format_len = sas_read2(&subheader[24], ctx->bswap); + if (ctx->u64) { + ctx->col_info[ctx->col_formats_count-1].format_width = sas_read2(&subheader[24], ctx->bswap); + ctx->col_info[ctx->col_formats_count-1].format_digits = sas_read2(&subheader[26], ctx->bswap); + } ctx->col_info[ctx->col_formats_count-1].format_ref = sas7bdat_parse_text_ref( ctx->u64 ? &subheader[46] : &subheader[34], ctx); ctx->col_info[ctx->col_formats_count-1].label_ref = sas7bdat_parse_text_ref( @@ -693,8 +696,13 @@ static readstat_variable_t *sas7bdat_init_variable(sas7bdat_ctx_t *ctx, int i, goto cleanup; } size_t len = strlen(variable->format); - if (len && ctx->col_info[i].format_len) { - snprintf(variable->format + len, sizeof(variable->format) - len, "%d", ctx->col_info[i].format_len); + if (len && ctx->col_info[i].format_width) { + len += snprintf(variable->format + len, sizeof(variable->format) - len, + "%d", ctx->col_info[i].format_width); + } + if (len && ctx->col_info[i].format_digits) { + len += snprintf(variable->format + len, sizeof(variable->format) - len, + ".%d", ctx->col_info[i].format_digits); } if ((retval = sas7bdat_copy_text_ref(variable->label, sizeof(variable->label), ctx->col_info[i].label_ref, ctx)) != READSTAT_OK) { diff --git a/src/readstat/sas/readstat_xport_write.c b/src/readstat/sas/readstat_xport_write.c index 3d312dab..1f7b317b 100644 --- a/src/readstat/sas/readstat_xport_write.c +++ b/src/readstat/sas/readstat_xport_write.c @@ -10,7 +10,7 @@ #include "readstat_xport_parse_format.h" #include "ieee.h" -#define XPORT_DEFAULT_VERISON 8 +#define XPORT_DEFAULT_VERSION 8 #define RECORD_LEN 80 #if defined _MSC_VER @@ -53,8 +53,8 @@ static readstat_error_t xport_write_header_record_v8(readstat_writer_t *writer, xport_header_record_t *xrecord) { char record[RECORD_LEN+1]; snprintf(record, sizeof(record), - "HEADER RECORD*******%-8sHEADER RECORD!!!!!!!%-30d", - xrecord->name, xrecord->num1); + "HEADER RECORD*******%-8sHEADER RECORD!!!!!!!" "%15d" "%15d", + xrecord->name, xrecord->num1, xrecord->num2); return xport_write_record(writer, record); } @@ -356,12 +356,16 @@ static readstat_error_t xport_write_namestr_header_record(readstat_writer_t *wri } static readstat_error_t xport_write_obs_header_record(readstat_writer_t *writer) { + if (writer->version == 8) { + xport_header_record_t xrecord = { + .name = "OBSV8", + .num1 = writer->row_count + }; + return xport_write_header_record_v8(writer, &xrecord); + } xport_header_record_t xrecord = { .name = "OBS" }; - if (writer->version == 8) { - strcpy(xrecord.name, "OBSV8"); - } return xport_write_header_record(writer, &xrecord); } @@ -531,7 +535,7 @@ static readstat_error_t xport_metadata_ok(void *writer_ctx) { readstat_error_t readstat_begin_writing_xport(readstat_writer_t *writer, void *user_ctx, long row_count) { if (writer->version == 0) - writer->version = XPORT_DEFAULT_VERISON; + writer->version = XPORT_DEFAULT_VERSION; writer->callbacks.metadata_ok = &xport_metadata_ok; writer->callbacks.write_int8 = &xport_write_int8; diff --git a/src/readstat/spss/readstat_por_read.c b/src/readstat/spss/readstat_por_read.c index 44dc0f48..9f6f8924 100644 --- a/src/readstat/spss/readstat_por_read.c +++ b/src/readstat/spss/readstat_por_read.c @@ -682,8 +682,7 @@ readstat_error_t read_version_and_timestamp(por_ctx_t *ctx) { goto cleanup; } if (sscanf(string, "%02d%02d%02d", ×tamp.tm_hour, ×tamp.tm_min, ×tamp.tm_sec) != 3) { - retval = READSTAT_ERROR_BAD_TIMESTAMP_STRING; - goto cleanup; + /* optional */ } timestamp.tm_year -= 1900; diff --git a/src/readstat/spss/readstat_por_write.c b/src/readstat/spss/readstat_por_write.c index cae2af01..270b0d41 100644 --- a/src/readstat/spss/readstat_por_write.c +++ b/src/readstat/spss/readstat_por_write.c @@ -201,11 +201,10 @@ static readstat_error_t por_emit_header(readstat_writer_t *writer, por_write_ctx size_t file_label_len = strlen(writer->file_label); char vanity[5][40]; memset(vanity, '0', sizeof(vanity)); + memset(vanity[1], ' ', sizeof(vanity[1])); memcpy(vanity[1], "ASCII SPSS PORT FILE", 20); - strncpy(vanity[1] + 20, writer->file_label, 20); - if (file_label_len < 20) - memset(vanity[1] + 20 + file_label_len, ' ', 20 - file_label_len); + memcpy(vanity[1] + 20, writer->file_label, file_label_len > 20 ? 20 : file_label_len); por_write_bytes(writer, vanity, sizeof(vanity)); diff --git a/src/readstat/spss/readstat_sav.c b/src/readstat/spss/readstat_sav.c index 18a59168..62188bba 100644 --- a/src/readstat/spss/readstat_sav.c +++ b/src/readstat/spss/readstat_sav.c @@ -59,6 +59,8 @@ sav_ctx_t *sav_ctx_init(sav_file_header_record_t *header, readstat_io_t *io) { return NULL; } + ctx->mr_sets = NULL; + ctx->io = io; return ctx; @@ -89,6 +91,25 @@ void sav_ctx_free(sav_ctx_t *ctx) { if (ctx->variable_display_values) { free(ctx->variable_display_values); } + if (ctx->mr_sets) { + for (size_t i = 0; i < ctx->multiple_response_sets_length; i++) { + if (ctx->mr_sets[i].name) { + free(ctx->mr_sets[i].name); + } + if (ctx->mr_sets[i].label) { + free(ctx->mr_sets[i].label); + } + if (ctx->mr_sets[i].subvariables) { + for (size_t j = 0; j < ctx->mr_sets[i].num_subvars; j++) { + if (ctx->mr_sets[i].subvariables[j]) { + free(ctx->mr_sets[i].subvariables[j]); + } + } + free(ctx->mr_sets[i].subvariables); + } + } + free(ctx->mr_sets); + } free(ctx); } diff --git a/src/readstat/spss/readstat_sav.h b/src/readstat/spss/readstat_sav.h index c4b68de5..e417ac45 100644 --- a/src/readstat/spss/readstat_sav.h +++ b/src/readstat/spss/readstat_sav.h @@ -3,6 +3,7 @@ // #include "readstat_spss.h" +#include "../readstat.h" #pragma pack(push, 1) @@ -100,6 +101,9 @@ typedef struct sav_ctx_s { uint64_t lowest_double; uint64_t highest_double; + size_t multiple_response_sets_length; + mr_set_t *mr_sets; + double bias; int format_version; @@ -117,6 +121,7 @@ typedef struct sav_ctx_s { #define SAV_RECORD_SUBTYPE_INTEGER_INFO 3 #define SAV_RECORD_SUBTYPE_FP_INFO 4 +#define SAV_RECORD_SUBTYPE_MULTIPLE_RESPONSE_SETS 7 #define SAV_RECORD_SUBTYPE_PRODUCT_INFO 10 #define SAV_RECORD_SUBTYPE_VAR_DISPLAY 11 #define SAV_RECORD_SUBTYPE_LONG_VAR_NAME 13 diff --git a/src/readstat/spss/readstat_sav_parse.c b/src/readstat/spss/readstat_sav_parse.c index 93df3492..10b3ac6c 100644 --- a/src/readstat/spss/readstat_sav_parse.c +++ b/src/readstat/spss/readstat_sav_parse.c @@ -1,3 +1,4 @@ + #line 1 "src/spss/readstat_sav_parse.rl" #include #include @@ -13,309 +14,263 @@ typedef struct varlookup { - char name[8*4+1]; - int index; + char name[8*4+1]; + int index; } varlookup_t; static int compare_key_varlookup(const void *elem1, const void *elem2) { - const char *key = (const char *)elem1; - const varlookup_t *v = (const varlookup_t *)elem2; - return strcasecmp(key, v->name); + const char *key = (const char *)elem1; + const varlookup_t *v = (const varlookup_t *)elem2; + return strcasecmp(key, v->name); } static int compare_varlookups(const void *elem1, const void *elem2) { - const varlookup_t *v1 = (const varlookup_t *)elem1; - const varlookup_t *v2 = (const varlookup_t *)elem2; - return strcasecmp(v1->name, v2->name); + const varlookup_t *v1 = (const varlookup_t *)elem1; + const varlookup_t *v2 = (const varlookup_t *)elem2; + return strcasecmp(v1->name, v2->name); } static int count_vars(sav_ctx_t *ctx) { - int i; - spss_varinfo_t *last_info = NULL; - int var_count = 0; - for (i=0; ivar_index; i++) { - spss_varinfo_t *info = ctx->varinfo[i]; - if (last_info == NULL || strcmp(info->name, last_info->name) != 0) { - var_count++; - } - last_info = info; - } - return var_count; + int i; + spss_varinfo_t *last_info = NULL; + int var_count = 0; + for (i=0; ivar_index; i++) { + spss_varinfo_t *info = ctx->varinfo[i]; + if (last_info == NULL || strcmp(info->name, last_info->name) != 0) { + var_count++; + } + last_info = info; + } + return var_count; } static varlookup_t *build_lookup_table(int var_count, sav_ctx_t *ctx) { - varlookup_t *table = readstat_malloc(var_count * sizeof(varlookup_t)); - int offset = 0; - int i; - spss_varinfo_t *last_info = NULL; - for (i=0; ivar_index; i++) { - spss_varinfo_t *info = ctx->varinfo[i]; - - if (last_info == NULL || strcmp(info->name, last_info->name) != 0) { - varlookup_t *entry = &table[offset++]; - - memcpy(entry->name, info->name, sizeof(info->name)); - entry->index = info->index; - } - last_info = info; - } - qsort(table, var_count, sizeof(varlookup_t), &compare_varlookups); - return table; + varlookup_t *table = readstat_malloc(var_count * sizeof(varlookup_t)); + int offset = 0; + int i; + spss_varinfo_t *last_info = NULL; + for (i=0; ivar_index; i++) { + spss_varinfo_t *info = ctx->varinfo[i]; + + if (last_info == NULL || strcmp(info->name, last_info->name) != 0) { + varlookup_t *entry = &table[offset++]; + + memcpy(entry->name, info->name, sizeof(info->name)); + entry->index = info->index; + } + last_info = info; + } + qsort(table, var_count, sizeof(varlookup_t), &compare_varlookups); + return table; } -#line 68 "src/spss/readstat_sav_parse.c" -static const signed char _sav_long_variable_parse_actions[] = { - 0, 1, 1, 1, 5, 2, 2, 0, - 3, 6, 4, 3, 0 +#line 69 "src/spss/readstat_sav_parse.c" +static const char _sav_long_variable_parse_actions[] = { + 0, 1, 1, 1, 5, 2, 2, 0, + 3, 6, 4, 3 }; static const short _sav_long_variable_parse_key_offsets[] = { - 0, 0, 5, 19, 33, 47, 61, 75, - 89, 103, 104, 108, 113, 118, 123, 128, - 133, 138, 143, 148, 153, 158, 163, 168, - 173, 178, 183, 188, 193, 198, 203, 208, - 213, 218, 223, 228, 233, 238, 243, 248, - 253, 258, 263, 268, 273, 278, 283, 288, - 293, 298, 303, 308, 313, 318, 323, 328, - 333, 338, 343, 348, 353, 358, 363, 368, - 373, 378, 383, 388, 393, 398, 403, 408, - 413, 418, 423, 428, 0 + 0, 0, 5, 19, 33, 47, 61, 75, + 89, 103, 104, 108, 113, 118, 123, 128, + 133, 138, 143, 148, 153, 158, 163, 168, + 173, 178, 183, 188, 193, 198, 203, 208, + 213, 218, 223, 228, 233, 238, 243, 248, + 253, 258, 263, 268, 273, 278, 283, 288, + 293, 298, 303, 308, 313, 318, 323, 328, + 333, 338, 343, 348, 353, 358, 363, 368, + 373, 378, 383, 388, 393, 398, 403, 408, + 413, 418, 423, 428 }; static const unsigned char _sav_long_variable_parse_trans_keys[] = { - 255u, 0u, 63u, 91u, 127u, 47u, 61u, 96u, - 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, - 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, - 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, - 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, - 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, - 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, - 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, - 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, - 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, - 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, - 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, - 45u, 58u, 63u, 91u, 94u, 123u, 127u, 61u, - 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, - 31u, 255u, 0u, 63u, 91u, 127u, 9u, 127u, - 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, - 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, - 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, - 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, - 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, - 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, - 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, - 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, - 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, - 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, - 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, - 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, - 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, - 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, - 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, - 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, - 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, - 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, - 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, - 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, - 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, - 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, - 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, - 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, - 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, - 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, - 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, - 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, - 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, - 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, - 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, - 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, - 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, - 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, - 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, - 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, - 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, - 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, - 127u, 255u, 0u, 31u, 9u, 0u + 255u, 0u, 63u, 91u, 127u, 47u, 61u, 96u, + 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, + 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, + 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, + 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, + 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, + 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, + 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, + 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, + 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, + 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, + 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, + 45u, 58u, 63u, 91u, 94u, 123u, 127u, 61u, + 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, + 31u, 255u, 0u, 63u, 91u, 127u, 9u, 127u, + 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, + 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, + 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, + 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, + 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, + 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, + 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, + 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, + 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, + 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, + 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, + 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, + 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, + 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, + 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, + 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, + 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, + 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, + 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, + 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, + 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, + 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, + 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, + 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, + 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, + 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, + 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, + 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, + 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, + 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, + 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, + 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, + 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, + 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, + 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, + 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, + 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, + 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, + 127u, 255u, 0u, 31u, 9u, 0 }; -static const signed char _sav_long_variable_parse_single_lengths[] = { - 0, 1, 4, 4, 4, 4, 4, 4, - 4, 1, 2, 3, 1, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 1, 0 +static const char _sav_long_variable_parse_single_lengths[] = { + 0, 1, 4, 4, 4, 4, 4, 4, + 4, 1, 2, 3, 1, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 1 }; -static const signed char _sav_long_variable_parse_range_lengths[] = { - 0, 2, 5, 5, 5, 5, 5, 5, - 5, 0, 1, 1, 2, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 0 +static const char _sav_long_variable_parse_range_lengths[] = { + 0, 2, 5, 5, 5, 5, 5, 5, + 5, 0, 1, 1, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0 }; static const short _sav_long_variable_parse_index_offsets[] = { - 0, 0, 4, 14, 24, 34, 44, 54, - 64, 74, 76, 80, 85, 89, 94, 99, - 104, 109, 114, 119, 124, 129, 134, 139, - 144, 149, 154, 159, 164, 169, 174, 179, - 184, 189, 194, 199, 204, 209, 214, 219, - 224, 229, 234, 239, 244, 249, 254, 259, - 264, 269, 274, 279, 284, 289, 294, 299, - 304, 309, 314, 319, 324, 329, 334, 339, - 344, 349, 354, 359, 364, 369, 374, 379, - 384, 389, 394, 399, 0 + 0, 0, 4, 14, 24, 34, 44, 54, + 64, 74, 76, 80, 85, 89, 94, 99, + 104, 109, 114, 119, 124, 129, 134, 139, + 144, 149, 154, 159, 164, 169, 174, 179, + 184, 189, 194, 199, 204, 209, 214, 219, + 224, 229, 234, 239, 244, 249, 254, 259, + 264, 269, 274, 279, 284, 289, 294, 299, + 304, 309, 314, 319, 324, 329, 334, 339, + 344, 349, 354, 359, 364, 369, 374, 379, + 384, 389, 394, 399 }; -static const signed char _sav_long_variable_parse_cond_targs[] = { - 0, 0, 0, 2, 0, 10, 0, 0, - 0, 0, 0, 0, 0, 3, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 4, - 0, 10, 0, 0, 0, 0, 0, 0, - 0, 5, 0, 10, 0, 0, 0, 0, - 0, 0, 0, 6, 0, 10, 0, 0, - 0, 0, 0, 0, 0, 7, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 8, - 0, 10, 0, 0, 0, 0, 0, 0, - 0, 9, 10, 0, 0, 0, 0, 11, - 12, 0, 0, 0, 13, 0, 0, 0, - 2, 12, 0, 0, 0, 14, 12, 0, - 0, 0, 15, 12, 0, 0, 0, 16, - 12, 0, 0, 0, 17, 12, 0, 0, - 0, 18, 12, 0, 0, 0, 19, 12, - 0, 0, 0, 20, 12, 0, 0, 0, - 21, 12, 0, 0, 0, 22, 12, 0, - 0, 0, 23, 12, 0, 0, 0, 24, - 12, 0, 0, 0, 25, 12, 0, 0, - 0, 26, 12, 0, 0, 0, 27, 12, - 0, 0, 0, 28, 12, 0, 0, 0, - 29, 12, 0, 0, 0, 30, 12, 0, - 0, 0, 31, 12, 0, 0, 0, 32, - 12, 0, 0, 0, 33, 12, 0, 0, - 0, 34, 12, 0, 0, 0, 35, 12, - 0, 0, 0, 36, 12, 0, 0, 0, - 37, 12, 0, 0, 0, 38, 12, 0, - 0, 0, 39, 12, 0, 0, 0, 40, - 12, 0, 0, 0, 41, 12, 0, 0, - 0, 42, 12, 0, 0, 0, 43, 12, - 0, 0, 0, 44, 12, 0, 0, 0, - 45, 12, 0, 0, 0, 46, 12, 0, - 0, 0, 47, 12, 0, 0, 0, 48, - 12, 0, 0, 0, 49, 12, 0, 0, - 0, 50, 12, 0, 0, 0, 51, 12, - 0, 0, 0, 52, 12, 0, 0, 0, - 53, 12, 0, 0, 0, 54, 12, 0, - 0, 0, 55, 12, 0, 0, 0, 56, - 12, 0, 0, 0, 57, 12, 0, 0, - 0, 58, 12, 0, 0, 0, 59, 12, - 0, 0, 0, 60, 12, 0, 0, 0, - 61, 12, 0, 0, 0, 62, 12, 0, - 0, 0, 63, 12, 0, 0, 0, 64, - 12, 0, 0, 0, 65, 12, 0, 0, - 0, 66, 12, 0, 0, 0, 67, 12, - 0, 0, 0, 68, 12, 0, 0, 0, - 69, 12, 0, 0, 0, 70, 12, 0, - 0, 0, 71, 12, 0, 0, 0, 72, - 12, 0, 0, 0, 73, 12, 0, 0, - 0, 74, 12, 0, 0, 0, 75, 12, - 0, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 36, 37, 38, - 39, 40, 41, 42, 43, 44, 45, 46, - 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, - 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, 0 +static const char _sav_long_variable_parse_indicies[] = { + 1, 1, 1, 0, 1, 3, 1, 1, + 1, 1, 1, 1, 1, 2, 1, 3, + 1, 1, 1, 1, 1, 1, 1, 4, + 1, 3, 1, 1, 1, 1, 1, 1, + 1, 5, 1, 3, 1, 1, 1, 1, + 1, 1, 1, 6, 1, 3, 1, 1, + 1, 1, 1, 1, 1, 7, 1, 3, + 1, 1, 1, 1, 1, 1, 1, 8, + 1, 3, 1, 1, 1, 1, 1, 1, + 1, 9, 3, 1, 1, 1, 1, 10, + 11, 1, 1, 1, 12, 1, 1, 1, + 0, 11, 1, 1, 1, 13, 11, 1, + 1, 1, 14, 11, 1, 1, 1, 15, + 11, 1, 1, 1, 16, 11, 1, 1, + 1, 17, 11, 1, 1, 1, 18, 11, + 1, 1, 1, 19, 11, 1, 1, 1, + 20, 11, 1, 1, 1, 21, 11, 1, + 1, 1, 22, 11, 1, 1, 1, 23, + 11, 1, 1, 1, 24, 11, 1, 1, + 1, 25, 11, 1, 1, 1, 26, 11, + 1, 1, 1, 27, 11, 1, 1, 1, + 28, 11, 1, 1, 1, 29, 11, 1, + 1, 1, 30, 11, 1, 1, 1, 31, + 11, 1, 1, 1, 32, 11, 1, 1, + 1, 33, 11, 1, 1, 1, 34, 11, + 1, 1, 1, 35, 11, 1, 1, 1, + 36, 11, 1, 1, 1, 37, 11, 1, + 1, 1, 38, 11, 1, 1, 1, 39, + 11, 1, 1, 1, 40, 11, 1, 1, + 1, 41, 11, 1, 1, 1, 42, 11, + 1, 1, 1, 43, 11, 1, 1, 1, + 44, 11, 1, 1, 1, 45, 11, 1, + 1, 1, 46, 11, 1, 1, 1, 47, + 11, 1, 1, 1, 48, 11, 1, 1, + 1, 49, 11, 1, 1, 1, 50, 11, + 1, 1, 1, 51, 11, 1, 1, 1, + 52, 11, 1, 1, 1, 53, 11, 1, + 1, 1, 54, 11, 1, 1, 1, 55, + 11, 1, 1, 1, 56, 11, 1, 1, + 1, 57, 11, 1, 1, 1, 58, 11, + 1, 1, 1, 59, 11, 1, 1, 1, + 60, 11, 1, 1, 1, 61, 11, 1, + 1, 1, 62, 11, 1, 1, 1, 63, + 11, 1, 1, 1, 64, 11, 1, 1, + 1, 65, 11, 1, 1, 1, 66, 11, + 1, 1, 1, 67, 11, 1, 1, 1, + 68, 11, 1, 1, 1, 69, 11, 1, + 1, 1, 70, 11, 1, 1, 1, 71, + 11, 1, 1, 1, 72, 11, 1, 1, + 1, 73, 11, 1, 1, 1, 74, 11, + 1, 0 }; -static const signed char _sav_long_variable_parse_cond_actions[] = { - 0, 0, 0, 1, 0, 5, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 5, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 5, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 5, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 5, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5, 0, 0, 0, 0, 0, 0, - 0, 0, 5, 0, 0, 0, 0, 3, - 8, 0, 0, 0, 0, 0, 0, 0, - 1, 8, 0, 0, 0, 0, 8, 0, - 0, 0, 0, 8, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 8, - 0, 0, 0, 0, 8, 0, 0, 0, - 0, 8, 0, 0, 0, 0, 8, 0, - 0, 0, 0, 8, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 8, - 0, 0, 0, 0, 8, 0, 0, 0, - 0, 8, 0, 0, 0, 0, 8, 0, - 0, 0, 0, 8, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 8, - 0, 0, 0, 0, 8, 0, 0, 0, - 0, 8, 0, 0, 0, 0, 8, 0, - 0, 0, 0, 8, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 8, - 0, 0, 0, 0, 8, 0, 0, 0, - 0, 8, 0, 0, 0, 0, 8, 0, - 0, 0, 0, 8, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 8, - 0, 0, 0, 0, 8, 0, 0, 0, - 0, 8, 0, 0, 0, 0, 8, 0, - 0, 0, 0, 8, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 8, - 0, 0, 0, 0, 8, 0, 0, 0, - 0, 8, 0, 0, 0, 0, 8, 0, - 0, 0, 0, 8, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 8, - 0, 0, 0, 0, 8, 0, 0, 0, - 0, 8, 0, 0, 0, 0, 8, 0, - 0, 0, 0, 8, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 8, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 8, 0, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 0 +static const char _sav_long_variable_parse_trans_targs[] = { + 2, 0, 3, 10, 4, 5, 6, 7, + 8, 9, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75 }; -static const short _sav_long_variable_parse_eof_trans[] = { - 402, 403, 404, 405, 406, 407, 408, 409, - 410, 411, 412, 413, 414, 415, 416, 417, - 418, 419, 420, 421, 422, 423, 424, 425, - 426, 427, 428, 429, 430, 431, 432, 433, - 434, 435, 436, 437, 438, 439, 440, 441, - 442, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, - 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, 471, 472, 473, - 474, 475, 476, 477, 0 +static const char _sav_long_variable_parse_trans_actions[] = { + 1, 0, 0, 5, 0, 0, 0, 0, + 0, 0, 3, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0 +}; + +static const char _sav_long_variable_parse_eof_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8, 0, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8 }; static const int sav_long_variable_parse_start = 1; @@ -327,314 +282,323 @@ static const int sav_long_variable_parse_en_main = 1; readstat_error_t sav_parse_long_variable_names_record(void *data, int count, sav_ctx_t *ctx) { - unsigned char *c_data = (unsigned char *)data; - int var_count = count_vars(ctx); - readstat_error_t retval = READSTAT_OK; - - char temp_key[8+1]; - char temp_val[64+1]; - unsigned char *str_start = NULL; - size_t str_len = 0; - - char error_buf[8192]; - unsigned char *p = c_data; - unsigned char *pe = c_data + count; - - varlookup_t *table = build_lookup_table(var_count, ctx); - - unsigned char *eof = pe; - - int cs; - - -#line 351 "src/spss/readstat_sav_parse.c" + unsigned char *c_data = (unsigned char *)data; + int var_count = count_vars(ctx); + readstat_error_t retval = READSTAT_OK; + + char temp_key[8+1]; + char temp_val[64+1]; + unsigned char *str_start = NULL; + size_t str_len = 0; + + char error_buf[8192]; + unsigned char *p = c_data; + unsigned char *pe = c_data + count; + + varlookup_t *table = build_lookup_table(var_count, ctx); + + unsigned char *eof = pe; + + int cs; + + +#line 306 "src/spss/readstat_sav_parse.c" { - cs = (int)sav_long_variable_parse_start; + cs = sav_long_variable_parse_start; } - -#line 356 "src/spss/readstat_sav_parse.c" + +#line 311 "src/spss/readstat_sav_parse.c" { - int _klen; - unsigned int _trans = 0; - const unsigned char * _keys; - const signed char * _acts; - unsigned int _nacts; - _resume: {} - if ( p == pe && p != eof ) - goto _out; - if ( p == eof ) { - if ( _sav_long_variable_parse_eof_trans[cs] > 0 ) { - _trans = (unsigned int)_sav_long_variable_parse_eof_trans[cs] - 1; + int _klen; + unsigned int _trans; + const char *_acts; + unsigned int _nacts; + const unsigned char *_keys; + + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _sav_long_variable_parse_trans_keys + _sav_long_variable_parse_key_offsets[cs]; + _trans = _sav_long_variable_parse_index_offsets[cs]; + + _klen = _sav_long_variable_parse_single_lengths[cs]; + if ( _klen > 0 ) { + const unsigned char *_lower = _keys; + const unsigned char *_mid; + const unsigned char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*p) < *_mid ) + _upper = _mid - 1; + else if ( (*p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; } } - else { - _keys = ( _sav_long_variable_parse_trans_keys + (_sav_long_variable_parse_key_offsets[cs])); - _trans = (unsigned int)_sav_long_variable_parse_index_offsets[cs]; - - _klen = (int)_sav_long_variable_parse_single_lengths[cs]; - if ( _klen > 0 ) { - const unsigned char *_lower = _keys; - const unsigned char *_upper = _keys + _klen - 1; - const unsigned char *_mid; - while ( 1 ) { - if ( _upper < _lower ) { - _keys += _klen; - _trans += (unsigned int)_klen; - break; - } - - _mid = _lower + ((_upper-_lower) >> 1); - if ( ( (*( p))) < (*( _mid)) ) - _upper = _mid - 1; - else if ( ( (*( p))) > (*( _mid)) ) - _lower = _mid + 1; - else { - _trans += (unsigned int)(_mid - _keys); - goto _match; - } - } - } - - _klen = (int)_sav_long_variable_parse_range_lengths[cs]; - if ( _klen > 0 ) { - const unsigned char *_lower = _keys; - const unsigned char *_upper = _keys + (_klen<<1) - 2; - const unsigned char *_mid; - while ( 1 ) { - if ( _upper < _lower ) { - _trans += (unsigned int)_klen; - break; - } - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( ( (*( p))) < (*( _mid)) ) - _upper = _mid - 2; - else if ( ( (*( p))) > (*( _mid + 1)) ) - _lower = _mid + 2; - else { - _trans += (unsigned int)((_mid - _keys)>>1); - break; - } - } + _keys += _klen; + _trans += _klen; + } + + _klen = _sav_long_variable_parse_range_lengths[cs]; + if ( _klen > 0 ) { + const unsigned char *_lower = _keys; + const unsigned char *_mid; + const unsigned char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + goto _match; } - - _match: {} } - cs = (int)_sav_long_variable_parse_cond_targs[_trans]; - - if ( _sav_long_variable_parse_cond_actions[_trans] != 0 ) { - - _acts = ( _sav_long_variable_parse_actions + (_sav_long_variable_parse_cond_actions[_trans])); - _nacts = (unsigned int)(*( _acts)); - _acts += 1; - while ( _nacts > 0 ) { - switch ( (*( _acts)) ) - { - case 0: { - { + _trans += _klen; + } + +_match: + _trans = _sav_long_variable_parse_indicies[_trans]; + cs = _sav_long_variable_parse_trans_targs[_trans]; + + if ( _sav_long_variable_parse_trans_actions[_trans] == 0 ) + goto _again; + + _acts = _sav_long_variable_parse_actions + _sav_long_variable_parse_trans_actions[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + { + switch ( *_acts++ ) + { + case 0: #line 13 "src/spss/readstat_sav_parse.rl" - - memcpy(temp_key, str_start, str_len); - temp_key[str_len] = '\0'; - } - -#line 442 "src/spss/readstat_sav_parse.c" - - break; - } - case 1: { - { + { + memcpy(temp_key, str_start, str_len); + temp_key[str_len] = '\0'; + } + break; + case 1: #line 20 "src/spss/readstat_sav_parse.rl" - str_start = p; } - -#line 451 "src/spss/readstat_sav_parse.c" - - break; - } - case 2: { - { + { str_start = p; } + break; + case 2: #line 20 "src/spss/readstat_sav_parse.rl" - str_len = p - str_start; } - -#line 460 "src/spss/readstat_sav_parse.c" - - break; - } - case 3: { - { + { str_len = p - str_start; } + break; + case 3: #line 102 "src/spss/readstat_sav_parse.rl" - - varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); - if (found) { - spss_varinfo_t *info = ctx->varinfo[found->index]; - memcpy(info->longname, temp_val, str_len); - info->longname[str_len] = '\0'; - } else if (ctx->handle.error) { - snprintf(error_buf, sizeof(error_buf), "Failed to find %s", temp_key); - ctx->handle.error(error_buf, ctx->user_ctx); - } - } - -#line 479 "src/spss/readstat_sav_parse.c" - - break; - } - case 4: { - { -#line 114 "src/spss/readstat_sav_parse.rl" - - memcpy(temp_val, str_start, str_len); - temp_val[str_len] = '\0'; - } - -#line 491 "src/spss/readstat_sav_parse.c" - - break; - } - case 5: { - { -#line 119 "src/spss/readstat_sav_parse.rl" - str_start = p; } - -#line 500 "src/spss/readstat_sav_parse.c" - - break; - } - case 6: { - { -#line 119 "src/spss/readstat_sav_parse.rl" - str_len = p - str_start; } - -#line 509 "src/spss/readstat_sav_parse.c" - - break; - } - } - _nacts -= 1; - _acts += 1; - } - - } - - if ( p == eof ) { - if ( cs >= 11 ) - goto _out; - } - else { - if ( cs != 0 ) { - p += 1; - goto _resume; - } + { + varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); + if (!found) { + snprintf(error_buf, sizeof(error_buf), "Failed to find %s", temp_key); + if (ctx->handle.error) + ctx->handle.error(error_buf, ctx->user_ctx); + } else { + // Handle the edge case where a ghost variable name (from a multi-segment + // variable) is identical to a real variable name. Normally we handle this + // by incrementing the loop variable by n_segments, but n_segments hasn't + // been set when this record is processed. So just set the longname to every + // matching variable, ghost or real. + varlookup_t *iter_match = found; + while (iter_match >= table && strcmp(iter_match->name, temp_key) == 0) { + spss_varinfo_t *info = ctx->varinfo[iter_match->index]; + snprintf(info->longname, sizeof(info->longname), "%*s", (int)str_len, temp_val); + iter_match--; + } + iter_match = found + 1; + while (iter_match - table < var_count && strcmp(iter_match->name, temp_key) == 0) { + spss_varinfo_t *info = ctx->varinfo[iter_match->index]; + snprintf(info->longname, sizeof(info->longname), "%*s", (int)str_len, temp_val); + iter_match++; + } + } + } + break; + case 4: +#line 129 "src/spss/readstat_sav_parse.rl" + { + memcpy(temp_val, str_start, str_len); + temp_val[str_len] = '\0'; + } + break; + case 5: +#line 134 "src/spss/readstat_sav_parse.rl" + { str_start = p; } + break; + case 6: +#line 134 "src/spss/readstat_sav_parse.rl" + { str_len = p - str_start; } + break; +#line 444 "src/spss/readstat_sav_parse.c" } - _out: {} } - -#line 127 "src/spss/readstat_sav_parse.rl" - - - if (cs < -#line 537 "src/spss/readstat_sav_parse.c" - 11 + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + const char *__acts = _sav_long_variable_parse_actions + _sav_long_variable_parse_eof_actions[cs]; + unsigned int __nacts = (unsigned int) *__acts++; + while ( __nacts-- > 0 ) { + switch ( *__acts++ ) { + case 3: +#line 102 "src/spss/readstat_sav_parse.rl" + { + varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); + if (!found) { + snprintf(error_buf, sizeof(error_buf), "Failed to find %s", temp_key); + if (ctx->handle.error) + ctx->handle.error(error_buf, ctx->user_ctx); + } else { + // Handle the edge case where a ghost variable name (from a multi-segment + // variable) is identical to a real variable name. Normally we handle this + // by incrementing the loop variable by n_segments, but n_segments hasn't + // been set when this record is processed. So just set the longname to every + // matching variable, ghost or real. + varlookup_t *iter_match = found; + while (iter_match >= table && strcmp(iter_match->name, temp_key) == 0) { + spss_varinfo_t *info = ctx->varinfo[iter_match->index]; + snprintf(info->longname, sizeof(info->longname), "%*s", (int)str_len, temp_val); + iter_match--; + } + iter_match = found + 1; + while (iter_match - table < var_count && strcmp(iter_match->name, temp_key) == 0) { + spss_varinfo_t *info = ctx->varinfo[iter_match->index]; + snprintf(info->longname, sizeof(info->longname), "%*s", (int)str_len, temp_val); + iter_match++; + } + } + } + break; + case 4: #line 129 "src/spss/readstat_sav_parse.rl" - || p != pe) { - if (ctx->handle.error) { - snprintf(error_buf, sizeof(error_buf), "Error parsing string \"%.*s\" around byte #%ld/%d, character %c", - count, (char *)data, (long)(p - c_data), count, *p); - ctx->handle.error(error_buf, ctx->user_ctx); + { + memcpy(temp_val, str_start, str_len); + temp_val[str_len] = '\0'; + } + break; + case 6: +#line 134 "src/spss/readstat_sav_parse.rl" + { str_len = p - str_start; } + break; +#line 500 "src/spss/readstat_sav_parse.c" } - retval = READSTAT_ERROR_PARSE; } - - - if (table) - free(table); - - /* suppress warning */ - (void)sav_long_variable_parse_en_main; - - return retval; + } + + _out: {} + } + +#line 142 "src/spss/readstat_sav_parse.rl" + + + if (cs < 11|| p != pe) { + if (ctx->handle.error) { + snprintf(error_buf, sizeof(error_buf), "Error parsing string \"%.*s\" around byte #%ld/%d, character %c", + count, (char *)data, (long)(p - c_data), count, *p); + ctx->handle.error(error_buf, ctx->user_ctx); + } + retval = READSTAT_ERROR_PARSE; + } + + + if (table) + free(table); + + /* suppress warning */ + (void)sav_long_variable_parse_en_main; + + return retval; } -#line 560 "src/spss/readstat_sav_parse.c" -static const signed char _sav_very_long_string_parse_actions[] = { - 0, 1, 1, 1, 3, 1, 4, 2, - 2, 0, 2, 5, 4, 0 +#line 531 "src/spss/readstat_sav_parse.c" +static const char _sav_very_long_string_parse_actions[] = { + 0, 1, 1, 1, 3, 1, 4, 2, + 2, 0, 2, 5, 4 }; -static const signed char _sav_very_long_string_parse_key_offsets[] = { - 0, 0, 5, 19, 33, 47, 61, 75, - 89, 103, 104, 106, 110, 112, 0 +static const char _sav_very_long_string_parse_key_offsets[] = { + 0, 0, 5, 19, 33, 47, 61, 75, + 89, 103, 104, 106, 110, 112 }; static const unsigned char _sav_very_long_string_parse_trans_keys[] = { - 255u, 0u, 63u, 91u, 127u, 47u, 61u, 96u, - 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, - 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, - 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, - 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, - 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, - 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, - 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, - 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, - 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, - 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, - 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, - 45u, 58u, 63u, 91u, 94u, 123u, 127u, 61u, - 48u, 57u, 0u, 9u, 48u, 57u, 0u, 9u, - 255u, 0u, 63u, 91u, 127u, 0u + 255u, 0u, 63u, 91u, 127u, 47u, 61u, 96u, + 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, + 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, + 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, + 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, + 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, + 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, + 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, + 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, + 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, + 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, + 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, + 45u, 58u, 63u, 91u, 94u, 123u, 127u, 61u, + 48u, 57u, 0u, 9u, 48u, 57u, 0u, 9u, + 255u, 0u, 63u, 91u, 127u, 0 }; -static const signed char _sav_very_long_string_parse_single_lengths[] = { - 0, 1, 4, 4, 4, 4, 4, 4, - 4, 1, 0, 2, 2, 1, 0 +static const char _sav_very_long_string_parse_single_lengths[] = { + 0, 1, 4, 4, 4, 4, 4, 4, + 4, 1, 0, 2, 2, 1 }; -static const signed char _sav_very_long_string_parse_range_lengths[] = { - 0, 2, 5, 5, 5, 5, 5, 5, - 5, 0, 1, 1, 0, 2, 0 +static const char _sav_very_long_string_parse_range_lengths[] = { + 0, 2, 5, 5, 5, 5, 5, 5, + 5, 0, 1, 1, 0, 2 }; -static const signed char _sav_very_long_string_parse_index_offsets[] = { - 0, 0, 4, 14, 24, 34, 44, 54, - 64, 74, 76, 78, 82, 85, 0 +static const char _sav_very_long_string_parse_index_offsets[] = { + 0, 0, 4, 14, 24, 34, 44, 54, + 64, 74, 76, 78, 82, 85 }; -static const signed char _sav_very_long_string_parse_cond_targs[] = { - 0, 0, 0, 2, 0, 10, 0, 0, - 0, 0, 0, 0, 0, 3, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 4, - 0, 10, 0, 0, 0, 0, 0, 0, - 0, 5, 0, 10, 0, 0, 0, 0, - 0, 0, 0, 6, 0, 10, 0, 0, - 0, 0, 0, 0, 0, 7, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 8, - 0, 10, 0, 0, 0, 0, 0, 0, - 0, 9, 10, 0, 11, 0, 12, 13, - 11, 0, 12, 13, 0, 0, 0, 0, - 2, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 0 +static const char _sav_very_long_string_parse_indicies[] = { + 1, 1, 1, 0, 1, 3, 1, 1, + 1, 1, 1, 1, 1, 2, 1, 3, + 1, 1, 1, 1, 1, 1, 1, 4, + 1, 3, 1, 1, 1, 1, 1, 1, + 1, 5, 1, 3, 1, 1, 1, 1, + 1, 1, 1, 6, 1, 3, 1, 1, + 1, 1, 1, 1, 1, 7, 1, 3, + 1, 1, 1, 1, 1, 1, 1, 8, + 1, 3, 1, 1, 1, 1, 1, 1, + 1, 9, 3, 1, 10, 1, 11, 12, + 13, 1, 14, 15, 1, 1, 1, 1, + 0, 0 }; -static const signed char _sav_very_long_string_parse_cond_actions[] = { - 0, 0, 0, 1, 0, 7, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 7, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 7, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 7, 0, 0, 0, 0, 0, 0, - 0, 0, 7, 0, 10, 0, 3, 3, - 5, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 3, 0, 0, 0 +static const char _sav_very_long_string_parse_trans_targs[] = { + 2, 0, 3, 10, 4, 5, 6, 7, + 8, 9, 11, 12, 13, 11, 12, 13 }; -static const signed char _sav_very_long_string_parse_eof_trans[] = { - 90, 91, 92, 93, 94, 95, 96, 97, - 98, 99, 100, 101, 102, 103, 0 +static const char _sav_very_long_string_parse_trans_actions[] = { + 1, 0, 0, 7, 0, 0, 0, 0, + 0, 0, 10, 3, 3, 5, 0, 0 +}; + +static const char _sav_very_long_string_parse_eof_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 3, 0, 0 }; static const int sav_very_long_string_parse_start = 1; @@ -642,231 +606,238 @@ static const int sav_very_long_string_parse_start = 1; static const int sav_very_long_string_parse_en_main = 1; -#line 153 "src/spss/readstat_sav_parse.rl" +#line 168 "src/spss/readstat_sav_parse.rl" readstat_error_t sav_parse_very_long_string_record(void *data, int count, sav_ctx_t *ctx) { - unsigned char *c_data = (unsigned char *)data; - int var_count = count_vars(ctx); - readstat_error_t retval = READSTAT_OK; - - char temp_key[8*4+1]; - unsigned int temp_val = 0; - unsigned char *str_start = NULL; - size_t str_len = 0; - - size_t error_buf_len = 1024 + count; - char *error_buf = NULL; - unsigned char *p = c_data; - unsigned char *pe = c_data + count; - unsigned char *eof = pe; - - varlookup_t *table = NULL; - int cs; - - error_buf = readstat_malloc(error_buf_len); - table = build_lookup_table(var_count, ctx); - - -#line 672 "src/spss/readstat_sav_parse.c" + unsigned char *c_data = (unsigned char *)data; + int var_count = count_vars(ctx); + readstat_error_t retval = READSTAT_OK; + + char temp_key[8*4+1]; + unsigned int temp_val = 0; + unsigned char *str_start = NULL; + size_t str_len = 0; + + size_t error_buf_len = 1024 + count; + char *error_buf = NULL; + unsigned char *p = c_data; + unsigned char *pe = c_data + count; + unsigned char *eof = pe; + + varlookup_t *table = NULL; + int cs; + + error_buf = readstat_malloc(error_buf_len); + table = build_lookup_table(var_count, ctx); + + +#line 636 "src/spss/readstat_sav_parse.c" { - cs = (int)sav_very_long_string_parse_start; + cs = sav_very_long_string_parse_start; } - -#line 677 "src/spss/readstat_sav_parse.c" + +#line 641 "src/spss/readstat_sav_parse.c" { - int _klen; - unsigned int _trans = 0; - const unsigned char * _keys; - const signed char * _acts; - unsigned int _nacts; - _resume: {} - if ( p == pe && p != eof ) - goto _out; - if ( p == eof ) { - if ( _sav_very_long_string_parse_eof_trans[cs] > 0 ) { - _trans = (unsigned int)_sav_very_long_string_parse_eof_trans[cs] - 1; + int _klen; + unsigned int _trans; + const char *_acts; + unsigned int _nacts; + const unsigned char *_keys; + + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _sav_very_long_string_parse_trans_keys + _sav_very_long_string_parse_key_offsets[cs]; + _trans = _sav_very_long_string_parse_index_offsets[cs]; + + _klen = _sav_very_long_string_parse_single_lengths[cs]; + if ( _klen > 0 ) { + const unsigned char *_lower = _keys; + const unsigned char *_mid; + const unsigned char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*p) < *_mid ) + _upper = _mid - 1; + else if ( (*p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; } } - else { - _keys = ( _sav_very_long_string_parse_trans_keys + (_sav_very_long_string_parse_key_offsets[cs])); - _trans = (unsigned int)_sav_very_long_string_parse_index_offsets[cs]; - - _klen = (int)_sav_very_long_string_parse_single_lengths[cs]; - if ( _klen > 0 ) { - const unsigned char *_lower = _keys; - const unsigned char *_upper = _keys + _klen - 1; - const unsigned char *_mid; - while ( 1 ) { - if ( _upper < _lower ) { - _keys += _klen; - _trans += (unsigned int)_klen; - break; - } - - _mid = _lower + ((_upper-_lower) >> 1); - if ( ( (*( p))) < (*( _mid)) ) - _upper = _mid - 1; - else if ( ( (*( p))) > (*( _mid)) ) - _lower = _mid + 1; - else { - _trans += (unsigned int)(_mid - _keys); - goto _match; - } - } - } - - _klen = (int)_sav_very_long_string_parse_range_lengths[cs]; - if ( _klen > 0 ) { - const unsigned char *_lower = _keys; - const unsigned char *_upper = _keys + (_klen<<1) - 2; - const unsigned char *_mid; - while ( 1 ) { - if ( _upper < _lower ) { - _trans += (unsigned int)_klen; - break; - } - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( ( (*( p))) < (*( _mid)) ) - _upper = _mid - 2; - else if ( ( (*( p))) > (*( _mid + 1)) ) - _lower = _mid + 2; - else { - _trans += (unsigned int)((_mid - _keys)>>1); - break; - } - } + _keys += _klen; + _trans += _klen; + } + + _klen = _sav_very_long_string_parse_range_lengths[cs]; + if ( _klen > 0 ) { + const unsigned char *_lower = _keys; + const unsigned char *_mid; + const unsigned char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + goto _match; } - - _match: {} } - cs = (int)_sav_very_long_string_parse_cond_targs[_trans]; - - if ( _sav_very_long_string_parse_cond_actions[_trans] != 0 ) { - - _acts = ( _sav_very_long_string_parse_actions + (_sav_very_long_string_parse_cond_actions[_trans])); - _nacts = (unsigned int)(*( _acts)); - _acts += 1; - while ( _nacts > 0 ) { - switch ( (*( _acts)) ) - { - case 0: { - { + _trans += _klen; + } + +_match: + _trans = _sav_very_long_string_parse_indicies[_trans]; + cs = _sav_very_long_string_parse_trans_targs[_trans]; + + if ( _sav_very_long_string_parse_trans_actions[_trans] == 0 ) + goto _again; + + _acts = _sav_very_long_string_parse_actions + _sav_very_long_string_parse_trans_actions[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + { + switch ( *_acts++ ) + { + case 0: #line 13 "src/spss/readstat_sav_parse.rl" - - memcpy(temp_key, str_start, str_len); - temp_key[str_len] = '\0'; - } - -#line 763 "src/spss/readstat_sav_parse.c" - - break; - } - case 1: { - { + { + memcpy(temp_key, str_start, str_len); + temp_key[str_len] = '\0'; + } + break; + case 1: #line 20 "src/spss/readstat_sav_parse.rl" - str_start = p; } - -#line 772 "src/spss/readstat_sav_parse.c" - - break; - } - case 2: { - { + { str_start = p; } + break; + case 2: #line 20 "src/spss/readstat_sav_parse.rl" - str_len = p - str_start; } - -#line 781 "src/spss/readstat_sav_parse.c" - - break; - } - case 3: { - { -#line 178 "src/spss/readstat_sav_parse.rl" - - varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); - if (found) { - ctx->varinfo[found->index]->string_length = temp_val; - ctx->varinfo[found->index]->write_format.width = temp_val; - ctx->varinfo[found->index]->print_format.width = temp_val; - } - } - -#line 797 "src/spss/readstat_sav_parse.c" - - break; - } - case 4: { - { -#line 187 "src/spss/readstat_sav_parse.rl" - - if ((( (*( p)))) != '\0') { - unsigned char digit = (( (*( p)))) - '0'; - if (temp_val <= (UINT_MAX - digit) / 10) { - temp_val = 10 * temp_val + digit; - } else { - {p += 1; goto _out; } - } - } - } - -#line 815 "src/spss/readstat_sav_parse.c" - - break; - } - case 5: { - { -#line 198 "src/spss/readstat_sav_parse.rl" - temp_val = 0; } - -#line 824 "src/spss/readstat_sav_parse.c" - - break; - } - } - _nacts -= 1; - _acts += 1; - } - - } - - if ( p == eof ) { - if ( cs >= 11 ) - goto _out; - } - else { - if ( cs != 0 ) { - p += 1; - goto _resume; - } + { str_len = p - str_start; } + break; + case 3: +#line 193 "src/spss/readstat_sav_parse.rl" + { + varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); + if (found) { + // See logic above; we need to apply this to all matching variables since ghost variable + // names may conflict with real variable names. + varlookup_t *first_match = found, *last_match = found; + varlookup_t *iter_match = found - 1; + while (iter_match >= table && strcmp(iter_match->name, temp_key) == 0) { + first_match = iter_match; + iter_match--; + } + iter_match = found + 1; + while (iter_match - table < var_count && strcmp(iter_match->name, temp_key) == 0) { + last_match = iter_match; + iter_match++; + } + for (iter_match=first_match; iter_match<=last_match; iter_match++) { + ctx->varinfo[iter_match->index]->string_length = temp_val; + ctx->varinfo[iter_match->index]->write_format.width = temp_val; + ctx->varinfo[iter_match->index]->print_format.width = temp_val; + } + } + } + break; + case 4: +#line 217 "src/spss/readstat_sav_parse.rl" + { + if ((*p) != '\0') { + unsigned char digit = (*p) - '0'; + if (temp_val <= (UINT_MAX - digit) / 10) { + temp_val = 10 * temp_val + digit; + } else { + {p++; goto _out; } + } + } + } + break; + case 5: +#line 228 "src/spss/readstat_sav_parse.rl" + { temp_val = 0; } + break; +#line 773 "src/spss/readstat_sav_parse.c" } - _out: {} } - -#line 206 "src/spss/readstat_sav_parse.rl" - - - if (cs < -#line 852 "src/spss/readstat_sav_parse.c" - 11 -#line 208 "src/spss/readstat_sav_parse.rl" - || p != pe) { - if (ctx->handle.error) { - snprintf(error_buf, error_buf_len, "Parsed %ld of %ld bytes. Remaining bytes: %.*s", - (long)(p - c_data), (long)(pe - c_data), (int)(pe - p), p); - ctx->handle.error(error_buf, ctx->user_ctx); + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + const char *__acts = _sav_very_long_string_parse_actions + _sav_very_long_string_parse_eof_actions[cs]; + unsigned int __nacts = (unsigned int) *__acts++; + while ( __nacts-- > 0 ) { + switch ( *__acts++ ) { + case 3: +#line 193 "src/spss/readstat_sav_parse.rl" + { + varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); + if (found) { + // See logic above; we need to apply this to all matching variables since ghost variable + // names may conflict with real variable names. + varlookup_t *first_match = found, *last_match = found; + varlookup_t *iter_match = found - 1; + while (iter_match >= table && strcmp(iter_match->name, temp_key) == 0) { + first_match = iter_match; + iter_match--; + } + iter_match = found + 1; + while (iter_match - table < var_count && strcmp(iter_match->name, temp_key) == 0) { + last_match = iter_match; + iter_match++; + } + for (iter_match=first_match; iter_match<=last_match; iter_match++) { + ctx->varinfo[iter_match->index]->string_length = temp_val; + ctx->varinfo[iter_match->index]->write_format.width = temp_val; + ctx->varinfo[iter_match->index]->print_format.width = temp_val; + } + } + } + break; +#line 815 "src/spss/readstat_sav_parse.c" } - retval = READSTAT_ERROR_PARSE; } - - if (table) - free(table); - if (error_buf) - free(error_buf); - - /* suppress warning */ - (void)sav_very_long_string_parse_en_main; - - return retval; + } + + _out: {} + } + +#line 236 "src/spss/readstat_sav_parse.rl" + + + if (cs < 11 || p != pe) { + if (ctx->handle.error) { + snprintf(error_buf, error_buf_len, "Parsed %ld of %ld bytes. Remaining bytes: %.*s", + (long)(p - c_data), (long)(pe - c_data), (int)(pe - p), p); + ctx->handle.error(error_buf, ctx->user_ctx); + } + retval = READSTAT_ERROR_PARSE; + } + + if (table) + free(table); + if (error_buf) + free(error_buf); + + /* suppress warning */ + (void)sav_very_long_string_parse_en_main; + + return retval; } diff --git a/src/readstat/spss/readstat_sav_parse.rl b/src/readstat/spss/readstat_sav_parse.rl index 1d98087f..947cb48a 100644 --- a/src/readstat/spss/readstat_sav_parse.rl +++ b/src/readstat/spss/readstat_sav_parse.rl @@ -101,13 +101,28 @@ readstat_error_t sav_parse_long_variable_names_record(void *data, int count, sav %%{ action set_long_name { varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); - if (found) { - spss_varinfo_t *info = ctx->varinfo[found->index]; - memcpy(info->longname, temp_val, str_len); - info->longname[str_len] = '\0'; - } else if (ctx->handle.error) { + if (!found) { snprintf(error_buf, sizeof(error_buf), "Failed to find %s", temp_key); - ctx->handle.error(error_buf, ctx->user_ctx); + if (ctx->handle.error) + ctx->handle.error(error_buf, ctx->user_ctx); + } else { + // Handle the edge case where a ghost variable name (from a multi-segment + // variable) is identical to a real variable name. Normally we handle this + // by incrementing the loop variable by n_segments, but n_segments hasn't + // been set when this record is processed. So just set the longname to every + // matching variable, ghost or real. + varlookup_t *iter_match = found; + while (iter_match >= table && strcmp(iter_match->name, temp_key) == 0) { + spss_varinfo_t *info = ctx->varinfo[iter_match->index]; + snprintf(info->longname, sizeof(info->longname), "%*s", (int)str_len, temp_val); + iter_match--; + } + iter_match = found + 1; + while (iter_match - table < var_count && strcmp(iter_match->name, temp_key) == 0) { + spss_varinfo_t *info = ctx->varinfo[iter_match->index]; + snprintf(info->longname, sizeof(info->longname), "%*s", (int)str_len, temp_val); + iter_match++; + } } } @@ -178,9 +193,24 @@ readstat_error_t sav_parse_very_long_string_record(void *data, int count, sav_ct action set_width { varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); if (found) { - ctx->varinfo[found->index]->string_length = temp_val; - ctx->varinfo[found->index]->write_format.width = temp_val; - ctx->varinfo[found->index]->print_format.width = temp_val; + // See logic above; we need to apply this to all matching variables since ghost variable + // names may conflict with real variable names. + varlookup_t *first_match = found, *last_match = found; + varlookup_t *iter_match = found - 1; + while (iter_match >= table && strcmp(iter_match->name, temp_key) == 0) { + first_match = iter_match; + iter_match--; + } + iter_match = found + 1; + while (iter_match - table < var_count && strcmp(iter_match->name, temp_key) == 0) { + last_match = iter_match; + iter_match++; + } + for (iter_match=first_match; iter_match<=last_match; iter_match++) { + ctx->varinfo[iter_match->index]->string_length = temp_val; + ctx->varinfo[iter_match->index]->write_format.width = temp_val; + ctx->varinfo[iter_match->index]->print_format.width = temp_val; + } } } diff --git a/src/readstat/spss/readstat_sav_parse_mr_name.c b/src/readstat/spss/readstat_sav_parse_mr_name.c new file mode 100644 index 00000000..4bfad0c1 --- /dev/null +++ b/src/readstat/spss/readstat_sav_parse_mr_name.c @@ -0,0 +1,546 @@ +#line 1 "src/spss/readstat_sav_parse_mr_name.rl" +#include +#include +#include +#include "../readstat.h" +#include "../readstat_malloc.h" + + +#line 8 "src/spss/readstat_sav_parse_mr_name.c" +static const signed char _mr_extractor_actions[] = { + 0, 1, 0, 1, 1, 1, 2, 1, + 3, 1, 4, 0 +}; + +static const signed char _mr_extractor_key_offsets[] = { + 0, 0, 8, 17, 19, 22, 24, 27, + 36, 48, 0 +}; + +static const char _mr_extractor_trans_keys[] = { + 46, 95, 48, 57, 65, 90, 97, 122, + 46, 61, 95, 48, 57, 65, 90, 97, + 122, 67, 68, 32, 48, 57, 48, 57, + 32, 48, 57, 32, 46, 95, 48, 57, + 65, 90, 97, 122, 0, 32, 46, 95, + 9, 13, 48, 57, 65, 90, 97, 122, + 46, 95, 48, 57, 65, 90, 97, 122, + 0 +}; + +static const signed char _mr_extractor_single_lengths[] = { + 0, 2, 3, 0, 1, 0, 1, 3, + 4, 2, 0 +}; + +static const signed char _mr_extractor_range_lengths[] = { + 0, 3, 3, 1, 1, 1, 1, 3, + 4, 3, 0 +}; + +static const signed char _mr_extractor_index_offsets[] = { + 0, 0, 6, 13, 15, 18, 20, 23, + 30, 39, 0 +}; + +static const signed char _mr_extractor_cond_targs[] = { + 2, 2, 2, 2, 2, 0, 2, 3, + 2, 2, 2, 2, 0, 4, 0, 5, + 4, 0, 6, 0, 7, 6, 0, 7, + 8, 8, 8, 8, 8, 0, 9, 9, + 8, 8, 9, 8, 8, 8, 0, 8, + 8, 8, 8, 8, 0, 0, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 0 +}; + +static const signed char _mr_extractor_cond_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 3, 0, 5, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 9, 9, + 0, 0, 9, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const int mr_extractor_start = 1; + +static const int mr_extractor_en_main = 1; + + +#line 107 "src/spss/readstat_sav_parse_mr_name.rl" + + +readstat_error_t extract_mr_data(const char *line, mr_set_t *result) { + readstat_error_t retval = READSTAT_OK; + + // Variables needed for Ragel operation + int cs = 0; + char *p = (char *)line; + char *start = p; + char *pe = p + strlen(p) + 1; + + // Variables needed for passing Ragel intermediate results + char mr_type = '\0'; + int mr_counted_value = -1; + int mr_subvar_count = 0; + char **mr_subvariables = NULL; + char *mr_name = NULL; + char *mr_label = NULL; + + // Execute Ragel finite state machine (FSM) + +#line 89 "src/spss/readstat_sav_parse_mr_name.c" + { + cs = (int)mr_extractor_start; + } + +#line 127 "src/spss/readstat_sav_parse_mr_name.rl" + + +#line 94 "src/spss/readstat_sav_parse_mr_name.c" + { + int _klen; + unsigned int _trans = 0; + const char * _keys; + const signed char * _acts; + unsigned int _nacts; + _resume: {} + if ( p == pe ) + goto _out; + _keys = ( _mr_extractor_trans_keys + (_mr_extractor_key_offsets[cs])); + _trans = (unsigned int)_mr_extractor_index_offsets[cs]; + + _klen = (int)_mr_extractor_single_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_upper = _keys + _klen - 1; + const char *_mid; + while ( 1 ) { + if ( _upper < _lower ) { + _keys += _klen; + _trans += (unsigned int)_klen; + break; + } + + _mid = _lower + ((_upper-_lower) >> 1); + if ( ( (*( p))) < (*( _mid)) ) + _upper = _mid - 1; + else if ( ( (*( p))) > (*( _mid)) ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; + } + } + } + + _klen = (int)_mr_extractor_range_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_upper = _keys + (_klen<<1) - 2; + const char *_mid; + while ( 1 ) { + if ( _upper < _lower ) { + _trans += (unsigned int)_klen; + break; + } + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( ( (*( p))) < (*( _mid)) ) + _upper = _mid - 2; + else if ( ( (*( p))) > (*( _mid + 1)) ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + break; + } + } + } + + _match: {} + cs = (int)_mr_extractor_cond_targs[_trans]; + + if ( _mr_extractor_cond_actions[_trans] != 0 ) { + + _acts = ( _mr_extractor_actions + (_mr_extractor_cond_actions[_trans])); + _nacts = (unsigned int)(*( _acts)); + _acts += 1; + while ( _nacts > 0 ) { + switch ( (*( _acts)) ) + { + case 0: { + { +#line 10 "src/spss/readstat_sav_parse_mr_name.rl" + + mr_name = (char *)readstat_malloc(p - start + 1); + if (mr_name == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(mr_name, start, p - start); + mr_name[p - start] = '\0'; + } + +#line 177 "src/spss/readstat_sav_parse_mr_name.c" + + break; + } + case 1: { + { +#line 20 "src/spss/readstat_sav_parse_mr_name.rl" + + mr_type = *p; + start = p + 1; + } + +#line 188 "src/spss/readstat_sav_parse_mr_name.c" + + break; + } + case 2: { + { +#line 25 "src/spss/readstat_sav_parse_mr_name.rl" + + int n_cv_digs = p - start; + char *n_dig_str = (char *)readstat_malloc(n_cv_digs + 1); + if (n_dig_str == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(n_dig_str, start, n_cv_digs); + n_dig_str[n_cv_digs] = '\0'; + int n_digs = strtol(n_dig_str, NULL, 10); + free(n_dig_str); + if (n_digs != 0) { + char *cv = (char *)readstat_malloc(n_digs + 1); + if (cv == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(cv, p + 1, n_digs); + cv[n_digs] = '\0'; + mr_counted_value = strtol(cv, NULL, 10); + free(cv); + p = p + 1 + n_digs; + start = p + 1; + } + else { + mr_counted_value = -1; + } + } + +#line 223 "src/spss/readstat_sav_parse_mr_name.c" + + break; + } + case 3: { + { +#line 54 "src/spss/readstat_sav_parse_mr_name.rl" + + char *lbl_len_str = (char *)readstat_malloc(p - start + 1); + if (lbl_len_str == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(lbl_len_str, start, p - start); + lbl_len_str[p - start] = '\0'; + int len = strtol(lbl_len_str, NULL, 10); + free(lbl_len_str); + mr_label = (char *)readstat_malloc(len + 1); + if (mr_label == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(mr_label, p + 1, len); + mr_label[len] = '\0'; + p = p + 1 + len; + start = p + 1; + } + +#line 250 "src/spss/readstat_sav_parse_mr_name.c" + + break; + } + case 4: { + { +#line 75 "src/spss/readstat_sav_parse_mr_name.rl" + + int len = p - start; + char *subvar = (char *)readstat_malloc(len + 1); + if (subvar == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(subvar, start, len); + subvar[len] = '\0'; + start = p + 1; + char **new_subvariables = readstat_realloc(mr_subvariables, sizeof(char *) * (mr_subvar_count + 1)); + if (new_subvariables == NULL) { + free(subvar); + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + mr_subvariables = new_subvariables; + mr_subvariables[mr_subvar_count++] = subvar; + } + +#line 276 "src/spss/readstat_sav_parse_mr_name.c" + + break; + } + } + _nacts -= 1; + _acts += 1; + } + + } + + if ( cs != 0 ) { + p += 1; + goto _resume; + } + _out: {} + } + +#line 128 "src/spss/readstat_sav_parse_mr_name.rl" + + + // Check if FSM finished successfully + if (cs < +#line 296 "src/spss/readstat_sav_parse_mr_name.c" +9 +#line 131 "src/spss/readstat_sav_parse_mr_name.rl" + || p != pe) { + retval = READSTAT_ERROR_BAD_MR_STRING; + goto cleanup; + } + + (void)mr_extractor_en_main; + + // Assign parsed values to output parameter + result->name = mr_name; + result->label = mr_label; + result->type = mr_type; + result->counted_value = mr_counted_value; + result->subvariables = mr_subvariables; + result->num_subvars = mr_subvar_count; + if (result->type == 'D') { + result->is_dichotomy = 1; + } + + cleanup: + if (retval != READSTAT_OK) { + if (mr_subvariables != NULL) { + for (int i = 0; i < mr_subvar_count; i++) { + if (mr_subvariables[i] != NULL) free(mr_subvariables[i]); + } + free(mr_subvariables); + } + if (mr_name != NULL) free(mr_name); + if (mr_label != NULL) free(mr_label); + } + return retval; +} + + +readstat_error_t parse_mr_line(const char *line, mr_set_t *result) { + *result = (mr_set_t){0}; + return extract_mr_data(line, result); +} + + +#line 335 "src/spss/readstat_sav_parse_mr_name.c" +static const signed char _mr_parser_actions[] = { + 0, 1, 0, 0 +}; + +static const signed char _mr_parser_key_offsets[] = { + 0, 0, 1, 2, 4, 0 +}; + +static const char _mr_parser_trans_keys[] = { + 36, 10, 0, 10, 10, 0 +}; + +static const signed char _mr_parser_single_lengths[] = { + 0, 1, 1, 2, 1, 0 +}; + +static const signed char _mr_parser_range_lengths[] = { + 0, 0, 0, 0, 0, 0 +}; + +static const signed char _mr_parser_index_offsets[] = { + 0, 0, 2, 4, 7, 0 +}; + +static const signed char _mr_parser_cond_targs[] = { + 2, 0, 3, 2, 4, 3, 2, 3, + 2, 0, 1, 2, 3, 4, 0 +}; + +static const signed char _mr_parser_cond_actions[] = { + 0, 0, 1, 0, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 0 +}; + +static const int mr_parser_start = 1; + +static const int mr_parser_en_main = 1; + + +#line 202 "src/spss/readstat_sav_parse_mr_name.rl" + + +readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines) { + readstat_error_t retval = READSTAT_OK; + int cs = 0; + char *p = (char *)line; + char *start = p; + char *pe = p + strlen(p) + 1; + *mr_sets = NULL; + *n_mr_lines = 0; + + +#line 385 "src/spss/readstat_sav_parse_mr_name.c" + { + cs = (int)mr_parser_start; + } + +#line 213 "src/spss/readstat_sav_parse_mr_name.rl" + + +#line 390 "src/spss/readstat_sav_parse_mr_name.c" + { + int _klen; + unsigned int _trans = 0; + const char * _keys; + const signed char * _acts; + unsigned int _nacts; + _resume: {} + if ( p == pe ) + goto _out; + _keys = ( _mr_parser_trans_keys + (_mr_parser_key_offsets[cs])); + _trans = (unsigned int)_mr_parser_index_offsets[cs]; + + _klen = (int)_mr_parser_single_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_upper = _keys + _klen - 1; + const char *_mid; + while ( 1 ) { + if ( _upper < _lower ) { + _keys += _klen; + _trans += (unsigned int)_klen; + break; + } + + _mid = _lower + ((_upper-_lower) >> 1); + if ( ( (*( p))) < (*( _mid)) ) + _upper = _mid - 1; + else if ( ( (*( p))) > (*( _mid)) ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; + } + } + } + + _klen = (int)_mr_parser_range_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_upper = _keys + (_klen<<1) - 2; + const char *_mid; + while ( 1 ) { + if ( _upper < _lower ) { + _trans += (unsigned int)_klen; + break; + } + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( ( (*( p))) < (*( _mid)) ) + _upper = _mid - 2; + else if ( ( (*( p))) > (*( _mid + 1)) ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + break; + } + } + } + + _match: {} + cs = (int)_mr_parser_cond_targs[_trans]; + + if ( _mr_parser_cond_actions[_trans] != 0 ) { + + _acts = ( _mr_parser_actions + (_mr_parser_cond_actions[_trans])); + _nacts = (unsigned int)(*( _acts)); + _acts += 1; + while ( _nacts > 0 ) { + switch ( (*( _acts)) ) + { + case 0: { + { +#line 172 "src/spss/readstat_sav_parse_mr_name.rl" + + char *mln = (char *)readstat_malloc(p - start); + if (mln == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(mln, start + 1, p - start); + mln[p - start - 1] = '\0'; + mr_set_t *new_mr_sets = readstat_realloc(*mr_sets, ((*n_mr_lines) + 1) * sizeof(mr_set_t)); + if (new_mr_sets == NULL) { + free(mln); + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + *mr_sets = new_mr_sets; + retval = parse_mr_line(mln, &(*mr_sets)[*n_mr_lines]); + free(mln); + if (retval != READSTAT_OK) { + goto cleanup; + } + (*n_mr_lines)++; + start = p + 1; + } + +#line 487 "src/spss/readstat_sav_parse_mr_name.c" + + break; + } + } + _nacts -= 1; + _acts += 1; + } + + } + + if ( cs != 0 ) { + p += 1; + goto _resume; + } + _out: {} + } + +#line 214 "src/spss/readstat_sav_parse_mr_name.rl" + + + if (cs < +#line 506 "src/spss/readstat_sav_parse_mr_name.c" +4 +#line 216 "src/spss/readstat_sav_parse_mr_name.rl" + || p != pe) { + retval = READSTAT_ERROR_BAD_MR_STRING; + goto cleanup; + } + + (void)mr_parser_en_main; + + cleanup: + return retval; +} diff --git a/src/readstat/spss/readstat_sav_parse_mr_name.h b/src/readstat/spss/readstat_sav_parse_mr_name.h new file mode 100644 index 00000000..39752161 --- /dev/null +++ b/src/readstat/spss/readstat_sav_parse_mr_name.h @@ -0,0 +1,9 @@ +#ifndef READSTAT_PARSE_MR_NAME_H +#define READSTAT_PARSE_MR_NAME_H + +#include "../readstat.h" +#include "../readstat_malloc.h" + +readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines); + +#endif // READSTAT_PARSE_MR_NAME_H diff --git a/src/readstat/spss/readstat_sav_parse_mr_name.rl b/src/readstat/spss/readstat_sav_parse_mr_name.rl new file mode 100644 index 00000000..817638bf --- /dev/null +++ b/src/readstat/spss/readstat_sav_parse_mr_name.rl @@ -0,0 +1,225 @@ +#include +#include +#include +#include "../readstat.h" +#include "../readstat_malloc.h" + +%%{ + machine mr_extractor; + + action extract_mr_name { + mr_name = (char *)readstat_malloc(p - start + 1); + if (mr_name == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(mr_name, start, p - start); + mr_name[p - start] = '\0'; + } + + action extract_mr_type { + mr_type = *p; + start = p + 1; + } + + action extract_counted_value { + int n_cv_digs = p - start; + char *n_dig_str = (char *)readstat_malloc(n_cv_digs + 1); + if (n_dig_str == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(n_dig_str, start, n_cv_digs); + n_dig_str[n_cv_digs] = '\0'; + int n_digs = strtol(n_dig_str, NULL, 10); + free(n_dig_str); + if (n_digs != 0) { + char *cv = (char *)readstat_malloc(n_digs + 1); + if (cv == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(cv, p + 1, n_digs); + cv[n_digs] = '\0'; + mr_counted_value = strtol(cv, NULL, 10); + free(cv); + p = p + 1 + n_digs; + start = p + 1; + } + else { + mr_counted_value = -1; + } + } + + action extract_label { + char *lbl_len_str = (char *)readstat_malloc(p - start + 1); + if (lbl_len_str == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(lbl_len_str, start, p - start); + lbl_len_str[p - start] = '\0'; + int len = strtol(lbl_len_str, NULL, 10); + free(lbl_len_str); + mr_label = (char *)readstat_malloc(len + 1); + if (mr_label == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(mr_label, p + 1, len); + mr_label[len] = '\0'; + p = p + 1 + len; + start = p + 1; + } + + action extract_subvar { + int len = p - start; + char *subvar = (char *)readstat_malloc(len + 1); + if (subvar == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(subvar, start, len); + subvar[len] = '\0'; + start = p + 1; + char **new_subvariables = readstat_realloc(mr_subvariables, sizeof(char *) * (mr_subvar_count + 1)); + if (new_subvariables == NULL) { + free(subvar); + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + mr_subvariables = new_subvariables; + mr_subvariables[mr_subvar_count++] = subvar; + } + + nc = (alnum | '_' | '.' ); # name character (including dots) + name = nc+ '=' > extract_mr_name; + type = ('C' | 'D'){1} > extract_mr_type; + counted_value = digit* ' ' > extract_counted_value; + label = digit+ ' '+ > extract_label; + + end = (space | '\0'); # subvar token terminator + subvariable = (nc+ end >extract_subvar); + + main := name type counted_value label subvariable+; + + write data nofinal noerror; +}%% + +readstat_error_t extract_mr_data(const char *line, mr_set_t *result) { + readstat_error_t retval = READSTAT_OK; + + // Variables needed for Ragel operation + int cs = 0; + char *p = (char *)line; + char *start = p; + char *pe = p + strlen(p) + 1; + + // Variables needed for passing Ragel intermediate results + char mr_type = '\0'; + int mr_counted_value = -1; + int mr_subvar_count = 0; + char **mr_subvariables = NULL; + char *mr_name = NULL; + char *mr_label = NULL; + + // Execute Ragel finite state machine (FSM) + %% write init; + %% write exec; + + // Check if FSM finished successfully + if (cs < %%{ write first_final; }%% || p != pe) { + retval = READSTAT_ERROR_BAD_MR_STRING; + goto cleanup; + } + + (void)mr_extractor_en_main; + + // Assign parsed values to output parameter + result->name = mr_name; + result->label = mr_label; + result->type = mr_type; + result->counted_value = mr_counted_value; + result->subvariables = mr_subvariables; + result->num_subvars = mr_subvar_count; + if (result->type == 'D') { + result->is_dichotomy = 1; + } + +cleanup: + if (retval != READSTAT_OK) { + if (mr_subvariables != NULL) { + for (int i = 0; i < mr_subvar_count; i++) { + if (mr_subvariables[i] != NULL) free(mr_subvariables[i]); + } + free(mr_subvariables); + } + if (mr_name != NULL) free(mr_name); + if (mr_label != NULL) free(mr_label); + } + return retval; +} + + +readstat_error_t parse_mr_line(const char *line, mr_set_t *result) { + *result = (mr_set_t){0}; + return extract_mr_data(line, result); +} + +%%{ + machine mr_parser; + + action mr_line { + char *mln = (char *)readstat_malloc(p - start); + if (mln == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(mln, start + 1, p - start); + mln[p - start - 1] = '\0'; + mr_set_t *new_mr_sets = readstat_realloc(*mr_sets, ((*n_mr_lines) + 1) * sizeof(mr_set_t)); + if (new_mr_sets == NULL) { + free(mln); + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + *mr_sets = new_mr_sets; + retval = parse_mr_line(mln, &(*mr_sets)[*n_mr_lines]); + free(mln); + if (retval != READSTAT_OK) { + goto cleanup; + } + (*n_mr_lines)++; + start = p + 1; + } + line_start = '$'; + line_end = '\n'; + line_char = any - (line_end + line_start); + mr_line = line_start line_char* line_end > mr_line; + main := mr_line+ '\0'; + + write data nofinal noerror; +}%% + +readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines) { + readstat_error_t retval = READSTAT_OK; + int cs = 0; + char *p = (char *)line; + char *start = p; + char *pe = p + strlen(p) + 1; + *mr_sets = NULL; + *n_mr_lines = 0; + + %% write init; + %% write exec; + + if (cs < %%{ write first_final; }%% || p != pe) { + retval = READSTAT_ERROR_BAD_MR_STRING; + goto cleanup; + } + + (void)mr_parser_en_main; + +cleanup: + return retval; +} diff --git a/src/readstat/spss/readstat_sav_read.c b/src/readstat/spss/readstat_sav_read.c index 7f494904..731d8add 100644 --- a/src/readstat/spss/readstat_sav_read.c +++ b/src/readstat/spss/readstat_sav_read.c @@ -1,24 +1,26 @@ #include #include -#include #include #include #include #include #include #include +#include #include "../readstat.h" #include "../readstat_bits.h" #include "../readstat_iconv.h" #include "../readstat_convert.h" #include "../readstat_malloc.h" +#include "../CKHashTable.h" #include "readstat_sav.h" #include "readstat_sav_compress.h" #include "readstat_sav_parse.h" #include "readstat_sav_parse_timestamp.h" +#include "readstat_sav_parse_mr_name.h" #if HAVE_ZLIB #include "readstat_zsav_read.h" @@ -145,6 +147,32 @@ static readstat_error_t sav_parse_variable_display_parameter_record(sav_ctx_t *c static readstat_error_t sav_parse_machine_integer_info_record(const void *data, size_t data_len, sav_ctx_t *ctx); static readstat_error_t sav_parse_long_string_value_labels_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx); static readstat_error_t sav_parse_long_string_missing_values_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx); +static readstat_error_t sav_read_multiple_response_sets(size_t data_len, sav_ctx_t *ctx); + +static readstat_error_t sav_read_multiple_response_sets(size_t data_len, sav_ctx_t *ctx) { + readstat_error_t retval = READSTAT_OK; + + char *mr_string = readstat_malloc(data_len + 1); + if (mr_string == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + mr_string[data_len] = '\0'; + if (ctx->io->read(mr_string, data_len, ctx->io->io_ctx) < data_len) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } + if (mr_string[0] != '$') { + retval = READSTAT_ERROR_BAD_MR_STRING; + goto cleanup; + } + + retval = parse_mr_string(mr_string, &ctx->mr_sets, &ctx->multiple_response_sets_length); + +cleanup: + free(mr_string); + return retval; +} static void sav_tag_missing_double(readstat_value_t *value, sav_ctx_t *ctx) { double fp_value = value->v.double_value; @@ -337,7 +365,7 @@ static readstat_error_t sav_read_variable_record(sav_ctx_t *ctx) { } ctx->var_offset++; ctx->varinfo[ctx->var_index-1]->width++; - return 0; + return retval; } if ((info = readstat_calloc(1, sizeof(spss_varinfo_t))) == NULL) { @@ -703,24 +731,25 @@ static readstat_error_t sav_process_row(unsigned char *buffer, size_t buffer_len goto done; } if (var_info->type == READSTAT_TYPE_STRING) { - if (raw_str_used + 8 <= ctx->raw_string_len) { + // If we're in the last column of a segment, only read 7 bytes + // (Segments contain 255 bytes but have room for 256) + size_t read_len = 8 - (offset == 31); + if (raw_str_used + read_len <= ctx->raw_string_len) { if (raw_str_is_utf8) { /* Skip null bytes, see https://github.com/tidyverse/haven/issues/560 */ char c; - for (int i=0; i<8; i++) + for (int i=0; iraw_string[raw_str_used++] = c; } else { - memcpy(ctx->raw_string + raw_str_used, &buffer[data_offset], 8); - raw_str_used += 8; + memcpy(ctx->raw_string + raw_str_used, &buffer[data_offset], read_len); + raw_str_used += read_len; } } if (++offset == col_info->width) { - if (++segment_offset < var_info->n_segments) { - raw_str_used--; - } offset = 0; col++; + segment_offset++; } if (segment_offset == var_info->n_segments) { if (!ctx->variables[var_info->index]->skip) { @@ -1335,6 +1364,14 @@ static readstat_error_t sav_parse_records_pass1(sav_ctx_t *ctx) { retval = sav_parse_machine_integer_info_record(data_buf, data_len, ctx); if (retval != READSTAT_OK) goto cleanup; + } else if (subtype == SAV_RECORD_SUBTYPE_MULTIPLE_RESPONSE_SETS) { + if (ctx->mr_sets != NULL) { + retval = READSTAT_ERROR_BAD_MR_STRING; + goto cleanup; + } + retval = sav_read_multiple_response_sets(data_len, ctx); + if (retval != READSTAT_OK) + goto cleanup; } else { if (io->seek(data_len, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; @@ -1662,6 +1699,48 @@ readstat_error_t readstat_parse_sav(readstat_parser_t *parser, const char *path, metadata.file_label = ctx->file_label; + // Replace short MR names with long names + ck_hash_table_t *var_dict = ck_hash_table_init(ctx->var_index, 8); + for (size_t i = 0; i < ctx->var_index; i++) { + spss_varinfo_t *current_varinfo = ctx->varinfo[i]; + if (current_varinfo != NULL && current_varinfo->name[0] != '\0') { + ck_str_hash_insert(current_varinfo->name, current_varinfo, var_dict); + } + } + for (size_t i = 0; i < ctx->multiple_response_sets_length; i++) { + mr_set_t mr = ctx->mr_sets[i]; + for (size_t j = 0; j < mr.num_subvars; j++) { + char* sv_name_upper = readstat_malloc(strlen(mr.subvariables[j]) + 1); + if (sv_name_upper == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + sv_name_upper[strlen(mr.subvariables[j])] = '\0'; + for (int c = 0; mr.subvariables[j][c] != '\0'; c++) { + sv_name_upper[c] = toupper((unsigned char) mr.subvariables[j][c]); + } + spss_varinfo_t *info = (spss_varinfo_t *)ck_str_hash_lookup(sv_name_upper, var_dict); + if (info) { + free(mr.subvariables[j]); + // mr.subvariables[j] = NULL; + if ((mr.subvariables[j] = readstat_malloc(strlen(info->longname) + 1)) == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + // mr.subvariables[j][strlen(info->longname)] = '\0'; + strcpy(mr.subvariables[j], info->longname); + // mr.subvariables[j] = info->longname; + } + free(sv_name_upper); + // sv_name_upper = NULL; + } + } + if (var_dict) + ck_hash_table_free(var_dict); + + metadata.multiple_response_sets_length = ctx->multiple_response_sets_length; + metadata.mr_sets = ctx->mr_sets; + if (ctx->handle.metadata(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; @@ -1674,6 +1753,7 @@ readstat_error_t readstat_parse_sav(readstat_parser_t *parser, const char *path, if ((retval = sav_handle_variables(ctx)) != READSTAT_OK) goto cleanup; + if ((retval = sav_handle_fweight(ctx)) != READSTAT_OK) goto cleanup; diff --git a/src/readstat/stata/readstat_dta_read.c b/src/readstat/stata/readstat_dta_read.c index f63ec980..ce07c333 100644 --- a/src/readstat/stata/readstat_dta_read.c +++ b/src/readstat/stata/readstat_dta_read.c @@ -1,3 +1,6 @@ + +#define _XOPEN_SOURCE 700 /* for strnlen */ + #include #include #include @@ -6,16 +9,6 @@ #include #include -#if !defined(_POSIX_VERSION) || _POSIX_VERSION < 200809L -size_t strnlen(const char* s, size_t maxlen) { - const char* end; - end = memchr(s, '\0', maxlen); - if (end == NULL) - return maxlen; - return end - s; -} -#endif - #include "../readstat.h" #include "../readstat_bits.h" #include "../readstat_iconv.h"