-
Couldn't load subscription status.
- Fork 466
pkp/pkp-lib11758 fix region codes #11899
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: stable-3_4_0
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,15 +14,21 @@ | |
|
|
||
| namespace PKP\migration\upgrade\v3_4_0; | ||
|
|
||
| use Illuminate\Bus\Batch; | ||
| use Illuminate\Database\Schema\Blueprint; | ||
| use Illuminate\Support\Facades\Bus; | ||
| use Illuminate\Support\Facades\DB; | ||
| use Illuminate\Support\Facades\Schema; | ||
| use PKP\core\Core; | ||
| use PKP\install\DowngradeNotSupportedException; | ||
| use PKP\migration\Migration; | ||
| use PKP\migration\upgrade\v3_4_0\jobs\FixRegionCodes; | ||
| use PKP\migration\upgrade\v3_4_0\jobs\CleanTmpChangesForRegionCodesFixes; | ||
| use PKP\migration\upgrade\v3_4_0\jobs\FixRegionCodesDaily; | ||
| use PKP\migration\upgrade\v3_4_0\jobs\FixRegionCodesMonthly; | ||
| use PKP\migration\upgrade\v3_4_0\jobs\PreFixRegionCodesDaily; | ||
| use PKP\migration\upgrade\v3_4_0\jobs\PreFixRegionCodesMonthly; | ||
| use PKP\migration\upgrade\v3_4_0\jobs\RegionMappingTmpClear; | ||
| use PKP\migration\upgrade\v3_4_0\jobs\RegionMappingTmpInsert; | ||
| use Throwable; | ||
|
|
||
| class I8866_DispatchRegionCodesFixingJobs extends Migration | ||
| { | ||
|
|
@@ -31,8 +37,9 @@ class I8866_DispatchRegionCodesFixingJobs extends Migration | |
| */ | ||
| public function up(): void | ||
| { | ||
| if (DB::table('metrics_submission_geo_monthly')->whereNotNull('region')->exists() || | ||
| DB::table('metrics_submission_geo_daily')->whereNotNull('region')->exists()) { | ||
| if (DB::table('metrics_submission_geo_monthly')->where('region', '<>', '')->exists() || | ||
| DB::table('metrics_submission_geo_daily')->where('region', '<>', '')->exists()) { | ||
|
|
||
| // create a temporary table for the FIPS-ISO mapping | ||
| if (!Schema::hasTable('region_mapping_tmp')) { | ||
| Schema::create('region_mapping_tmp', function (Blueprint $table) { | ||
|
|
@@ -42,6 +49,14 @@ public function up(): void | |
| }); | ||
| } | ||
|
|
||
| // temporary change the length of the region columns, becuase we will add prefix 'pkp-' | ||
| Schema::table('metrics_submission_geo_daily', function (Blueprint $table) { | ||
| $table->string('region', 7)->change(); | ||
| }); | ||
| Schema::table('metrics_submission_geo_monthly', function (Blueprint $table) { | ||
| $table->string('region', 7)->change(); | ||
| }); | ||
|
Comment on lines
+53
to
+58
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sounds risky, if the job never gets completed, the database will be left with an unexpected structure. The same for the indexes below. |
||
|
|
||
| // temporary create index on the column country and region, in order to be able to update the region codes in a reasonable time | ||
| Schema::table('metrics_submission_geo_daily', function (Blueprint $table) { | ||
| $sm = Schema::getConnection()->getDoctrineSchemaManager(); | ||
|
|
@@ -58,35 +73,45 @@ public function up(): void | |
| } | ||
| }); | ||
|
|
||
| $geoDailyIdMax = DB::table('metrics_submission_geo_daily') | ||
| ->max('metrics_submission_geo_daily_id'); | ||
| $geoMonthlyIdMax = DB::table('metrics_submission_geo_monthly') | ||
| ->max('metrics_submission_geo_monthly_id'); | ||
|
|
||
| $chunkSize = 100000; | ||
| $geoDailyChunksNo = ceil($geoDailyIdMax / $chunkSize); | ||
| $geoMonthlyChunksNo = ceil($geoMonthlyIdMax / $chunkSize); | ||
|
|
||
| // read the FIPS to ISO mappings and displatch a job per country | ||
| $mappings = include Core::getBaseDir() . '/' . PKP_LIB_PATH . '/lib/regionMapping.php'; | ||
| $jobs = []; | ||
| foreach (array_keys($mappings) as $country) { | ||
| $jobs[] = new FixRegionCodes($country); | ||
| $jobs[] = new RegionMappingTmpInsert($country); | ||
| for ($i = 0; $i < $geoDailyChunksNo; $i++) { | ||
| $startId = ($i * $chunkSize) + 1; | ||
| $endId = min(($i + 1) * $chunkSize, $geoDailyIdMax); | ||
| $jobs[] = new PreFixRegionCodesDaily($startId, $endId); | ||
| } | ||
| for ($i = 0; $i < $geoMonthlyChunksNo; $i++) { | ||
| $startId = ($i * $chunkSize) + 1; | ||
| $endId = min(($i + 1) * $chunkSize, $geoMonthlyIdMax); | ||
| $jobs[] = new PreFixRegionCodesMonthly($startId, $endId); | ||
| } | ||
| for ($i = 0; $i < $geoDailyChunksNo; $i++) { | ||
| $startId = ($i * $chunkSize) + 1; | ||
| $endId = min(($i + 1) * $chunkSize, $geoDailyIdMax); | ||
| $jobs[] = new FixRegionCodesDaily($startId, $endId); | ||
| } | ||
| for ($i = 0; $i < $geoMonthlyChunksNo; $i++) { | ||
| $startId = ($i * $chunkSize) + 1; | ||
| $endId = min(($i + 1) * $chunkSize, $geoMonthlyIdMax); | ||
| $jobs[] = new FixRegionCodesMonthly($startId, $endId); | ||
| } | ||
| $jobs[] = new RegionMappingTmpClear(); | ||
| } | ||
|
|
||
| Bus::batch($jobs) | ||
| ->then(function (Batch $batch) { | ||
| // drop the temporary index | ||
| Schema::table('metrics_submission_geo_daily', function (Blueprint $table) { | ||
| $sm = Schema::getConnection()->getDoctrineSchemaManager(); | ||
| $indexesFound = $sm->listTableIndexes('metrics_submission_geo_daily'); | ||
| if (array_key_exists('metrics_submission_geo_daily_tmp_index', $indexesFound)) { | ||
| $table->dropIndex(['tmp']); | ||
| } | ||
| }); | ||
| Schema::table('metrics_submission_geo_monthly', function (Blueprint $table) { | ||
| $sm = Schema::getConnection()->getDoctrineSchemaManager(); | ||
| $indexesFound = $sm->listTableIndexes('metrics_submission_geo_monthly'); | ||
| if (array_key_exists('metrics_submission_geo_monthly_tmp_index', $indexesFound)) { | ||
| $table->dropIndex(['tmp']); | ||
| } | ||
| }); | ||
|
|
||
| // drop the temporary table | ||
| if (Schema::hasTable('region_mapping_tmp')) { | ||
| Schema::drop('region_mapping_tmp'); | ||
| } | ||
| $jobs[] = new CleanTmpChangesForRegionCodesFixes(); | ||
| Bus::chain($jobs) | ||
| ->catch(function (Throwable $e) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be some error logging here, the |
||
| }) | ||
| ->dispatch(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,59 @@ | ||||||||||
| <?php | ||||||||||
|
|
||||||||||
| /** | ||||||||||
| * @file classes/migration/upgrade/v3_4_0/jobs/CleanTmpChangesForRegionCodesFixes.php | ||||||||||
| * | ||||||||||
| * Copyright (c) 2022-2025 Simon Fraser University | ||||||||||
| * Copyright (c) 2022-2025 John Willinsky | ||||||||||
|
Comment on lines
+6
to
+7
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For new files we're keeping just the initial year
Suggested change
|
||||||||||
| * Distributed under the GNU GPL v3. For full terms see the file docs/COPYING. | ||||||||||
| * | ||||||||||
| * @class CleanTmpChangesForRegionCodesFixes | ||||||||||
| * | ||||||||||
| * @ingroup jobs | ||||||||||
| * | ||||||||||
| * @brief Drop temporary indexes on metrics Geo tables, drop the table region_mapping_tmp, and return the region column size | ||||||||||
| */ | ||||||||||
|
|
||||||||||
| namespace PKP\migration\upgrade\v3_4_0\jobs; | ||||||||||
|
|
||||||||||
| use Illuminate\Database\Schema\Blueprint; | ||||||||||
| use Illuminate\Support\Facades\Schema; | ||||||||||
| use PKP\jobs\BaseJob; | ||||||||||
|
|
||||||||||
| class CleanTmpChangesForRegionCodesFixes extends BaseJob | ||||||||||
| { | ||||||||||
| /** | ||||||||||
| * Execute the job. | ||||||||||
| */ | ||||||||||
| public function handle(): void | ||||||||||
| { | ||||||||||
| // drop the temporary index | ||||||||||
| Schema::table('metrics_submission_geo_daily', function (Blueprint $table) { | ||||||||||
| $sm = Schema::getConnection()->getDoctrineSchemaManager(); | ||||||||||
| $indexesFound = $sm->listTableIndexes('metrics_submission_geo_daily'); | ||||||||||
| if (array_key_exists('metrics_submission_geo_daily_tmp_index', $indexesFound)) { | ||||||||||
| $table->dropIndex(['tmp']); | ||||||||||
| } | ||||||||||
| }); | ||||||||||
| Schema::table('metrics_submission_geo_monthly', function (Blueprint $table) { | ||||||||||
| $sm = Schema::getConnection()->getDoctrineSchemaManager(); | ||||||||||
| $indexesFound = $sm->listTableIndexes('metrics_submission_geo_monthly'); | ||||||||||
| if (array_key_exists('metrics_submission_geo_monthly_tmp_index', $indexesFound)) { | ||||||||||
| $table->dropIndex(['tmp']); | ||||||||||
| } | ||||||||||
| }); | ||||||||||
|
|
||||||||||
| // return the length of the region columns to varchar(3) | ||||||||||
| Schema::table('metrics_submission_geo_daily', function (Blueprint $table) { | ||||||||||
| $table->string('region', 3)->change(); | ||||||||||
| }); | ||||||||||
| Schema::table('metrics_submission_geo_monthly', function (Blueprint $table) { | ||||||||||
| $table->string('region', 3)->change(); | ||||||||||
| }); | ||||||||||
|
Comment on lines
+47
to
+52
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think jobs shouldn't touch the structure of the database. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jonasraoni, in principle I agree with you -- but I'm not sure I can think of a better alternative. Our options (as I see them) are...
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we know how long it may take to run this script on a really heavy installation? If it's not that long, I think it's ok to keep it as a standard upgrade script. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My concern with these queued jobs and background tasks, especially if they are upgrade related, are with installations hosted on potentially less than ideal hosting solutions where users do not have access to server side logs or shell access. Obviously we do not recommend these sorts of scenarios but given how complex this is, even for us, I can imagine users wondering what's up. As an aside, what does this region code bug cause? If these scripts fail to run, how does one know? Are stats misattributed or not available? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I started the review with the opinion that this should first be turned into an upgrade script, I'll take a look again and check if I have ideas to share (regarding keeping it as a job). For now:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi all, this jobs/upgrade script changes the old region codes into new ones. I do not think that lots of users are using regions -- I think they rarely want to have that detailed usage statistics. The tracking of such usage is also resource intensive, so I would even think that the installations with not so much capacity (resources and personal) would not use them. (I am also wondering why don't we remove city and region level for usage stats -- I believe having country is enough for most of users, no?). I do not fully understand No. 2 from Jonas. I think this belongs somewhere else, how we generally deal with jobs? Because this upgrade is probably not so important for most of the users, and the upgrade to 3.4 lasted in some of Alec's tests hours, we decided to put it into jobs back then. So I believe, these changes make it pretty safe -- that the jobs would not fail (because of the timeout) and can be re-run. So I believe, it would be good to review it all having this in mind -- to be sure that this is so. And, in that case, why not leave it in jobs? Else, the only possibility would be to move it back to the normal process, but because it is so 'unimportant' this maybe does not make soooo much sense now -- but this decision I would need to leave to you... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The metrics will then exist for the region 01 instead for region 10 (not real region codes, just an example). And because we do not do anything with the regions and their metrics, I do not think anybody will notice that difference. Also, not all regions are affected, just some. |
||||||||||
|
|
||||||||||
| // drop the temporary table | ||||||||||
| if (Schema::hasTable('region_mapping_tmp')) { | ||||||||||
| Schema::drop('region_mapping_tmp'); | ||||||||||
| } | ||||||||||
| } | ||||||||||
| } | ||||||||||
This file was deleted.
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,67 @@ | ||||||||||||||||||||||||||||||||||||||||||||
| <?php | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||||||||||||
| * @file classes/migration/upgrade/v3_4_0/jobs/FixRegionCodesDaily.php | ||||||||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||||||||
| * Copyright (c) 2022-2025 Simon Fraser University | ||||||||||||||||||||||||||||||||||||||||||||
| * Copyright (c) 2022-2025 John Willinsky | ||||||||||||||||||||||||||||||||||||||||||||
| * Distributed under the GNU GPL v3. For full terms see the file docs/COPYING. | ||||||||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||||||||
| * @class FixRegionCodesDaily | ||||||||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||||||||
| * @ingroup jobs | ||||||||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||||||||
| * @brief Fixes the wrong region codes, using the temporary table of FIPS-ISO mapping. | ||||||||||||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| namespace PKP\migration\upgrade\v3_4_0\jobs; | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| use Illuminate\Support\Facades\DB; | ||||||||||||||||||||||||||||||||||||||||||||
| use PKP\config\Config; | ||||||||||||||||||||||||||||||||||||||||||||
| use PKP\jobs\BaseJob; | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| class FixRegionCodesDaily extends BaseJob | ||||||||||||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||||||||||||
| /** The range of metrics_submission_geo_daily_ids to consider for this update */ | ||||||||||||||||||||||||||||||||||||||||||||
| protected int $startId; | ||||||||||||||||||||||||||||||||||||||||||||
| protected int $endId; | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||||||||||||
| * Create a new job instance. | ||||||||||||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||||||||||||
| public function __construct(int $startId, int $endId) | ||||||||||||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||||||||||||
| parent::__construct(); | ||||||||||||||||||||||||||||||||||||||||||||
| $this->startId = $startId; | ||||||||||||||||||||||||||||||||||||||||||||
| $this->endId = $endId; | ||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+25
to
+37
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As this isn't going to be extended, it can be
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||||||||||||
| * Execute the job. | ||||||||||||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||||||||||||
| public function handle(): void | ||||||||||||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||||||||||||
| // update region code from FIPS to ISP, according to the entries in the table region_mapping_tmp | ||||||||||||||||||||||||||||||||||||||||||||
| // Laravel join+update does not work well with PostgreSQL, so use the direct SQLs | ||||||||||||||||||||||||||||||||||||||||||||
| // daily | ||||||||||||||||||||||||||||||||||||||||||||
| if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') { | ||||||||||||||||||||||||||||||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it should be possible to build this update and the others using the Laravel query builder. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Back then, some years ago, I tried but it was not working with inner join, I will try again... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I remember about something like that... The if (DB::connection() instanceof PostgresConnection) {
$updateQuery->updateFrom($update);
} else {
$updateQuery->update($update);
} |
||||||||||||||||||||||||||||||||||||||||||||
| DB::statement(" | ||||||||||||||||||||||||||||||||||||||||||||
| UPDATE metrics_submission_geo_daily AS gd | ||||||||||||||||||||||||||||||||||||||||||||
| SET region = rm.iso | ||||||||||||||||||||||||||||||||||||||||||||
| FROM region_mapping_tmp AS rm | ||||||||||||||||||||||||||||||||||||||||||||
| WHERE gd.country = rm.country | ||||||||||||||||||||||||||||||||||||||||||||
| AND gd.region = 'pkp-' || rm.fips AND | ||||||||||||||||||||||||||||||||||||||||||||
| gd.metrics_submission_geo_daily_id >= {$this->startId} | ||||||||||||||||||||||||||||||||||||||||||||
| gd.metrics_submission_geo_daily_id <= {$this->endId} | ||||||||||||||||||||||||||||||||||||||||||||
| "); | ||||||||||||||||||||||||||||||||||||||||||||
| } else { | ||||||||||||||||||||||||||||||||||||||||||||
| DB::statement(" | ||||||||||||||||||||||||||||||||||||||||||||
| UPDATE metrics_submission_geo_daily gd | ||||||||||||||||||||||||||||||||||||||||||||
| INNER JOIN region_mapping_tmp rm ON (rm.country = gd.country AND CONCAT('pkp-', rm.fips) = gd.region) | ||||||||||||||||||||||||||||||||||||||||||||
| SET gd.region = rm.iso | ||||||||||||||||||||||||||||||||||||||||||||
| WHERE gd.metrics_submission_geo_daily_id >= {$this->startId} AND | ||||||||||||||||||||||||||||||||||||||||||||
| gd.metrics_submission_geo_daily_id <= {$this->endId} | ||||||||||||||||||||||||||||||||||||||||||||
| "); | ||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GitHub doesn't allow me to comment on the lines below, but adding an index to the temporary table's columns might be helpful to performance.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think index would be needed even if the table only contains a few entries, lets say max 100 ?