diff --git a/bin/gargantext-upgrade/Main.hs b/bin/gargantext-upgrade/Main.hs index 41629fd38ef55aa25fc757421de2ca6d7c20ce57..17f4f5fab54fc4db1006d35a4d286e2659167714 100644 --- a/bin/gargantext-upgrade/Main.hs +++ b/bin/gargantext-upgrade/Main.hs @@ -79,7 +79,7 @@ main = do refreshIndex :: Cmd'' DevEnv IOException () refreshIndex = do - _ <- execPGSQuery [sql| refresh materialized view context_node_ngrams_view; |] () + _ <- execPGSQuery [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |] () pure () addIndex :: Cmd'' DevEnv IOException Int64 @@ -87,17 +87,24 @@ addIndex = do execPGSQuery query () where query = [sql| - create materialized view if not exists context_node_ngrams_view as - select context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id - from nodes_contexts - join context_node_ngrams - on context_node_ngrams.context_id = nodes_contexts.context_id; - - create index if not exists context_node_ngrams_context_id_ngrams_id_idx on context_node_ngrams(context_id, ngrams_id); - create index if not exists context_node_ngrams_view_context_id_idx on context_node_ngrams_view(context_id); - create index if not exists context_node_ngrams_view_ngrams_id_idx on context_node_ngrams_view(ngrams_id); - create index if not exists context_node_ngrams_view_node_id_idx on context_node_ngrams_view(node_id); - create index if not exists node_stories_ngrams_id_idx on node_stories(ngrams_id); + CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS + SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id + FROM nodes_contexts + JOIN context_node_ngrams + ON context_node_ngrams.context_id = nodes_contexts.context_id; + + CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx + ON context_node_ngrams(context_id, ngrams_id); + + CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx + ON context_node_ngrams_view(context_id); + CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx + ON context_node_ngrams_view(ngrams_id); + CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx + ON context_node_ngrams_view(node_id); + CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx + ON context_node_ngrams_view (context_id, ngrams_id, node_id); + + CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx + ON node_stories(ngrams_id); |] - - diff --git a/devops/postgres/schema.sql b/devops/postgres/schema.sql index 6e4c427aa54792833d96619f2b475125c0ac2b05..bc136dd07fa08c764bb000aff54344ea7857344a 100644 --- a/devops/postgres/schema.sql +++ b/devops/postgres/schema.sql @@ -337,14 +337,23 @@ CREATE OR REPLACE function node_pos(int, int) returns bigint --create index node_by_pos on nodes using btree(node_pos(id,typename)); -- Optimization for Ngrams Table View -create materialized view if not exists context_node_ngrams_view as -select context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id -from nodes_contexts -join context_node_ngrams -on context_node_ngrams.context_id = nodes_contexts.context_id; - -create index if not exists context_node_ngrams_context_id_ngrams_id_idx on context_node_ngrams(context_id, ngrams_id); -create index if not exists context_node_ngrams_view_context_id_idx on context_node_ngrams_view(context_id); -create index if not exists context_node_ngrams_view_ngrams_id_idx on context_node_ngrams_view(ngrams_id); -create index if not exists context_node_ngrams_view_node_id_idx on context_node_ngrams_view(node_id); -create index if not exists node_stories_ngrams_id_idx on node_stories(ngrams_id); +CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS + SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id + FROM nodes_contexts + JOIN context_node_ngrams + ON context_node_ngrams.context_id = nodes_contexts.context_id; + +CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx + ON context_node_ngrams(context_id, ngrams_id); + +CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx + ON context_node_ngrams_view(context_id); +CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx + ON context_node_ngrams_view(ngrams_id); +CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx + ON context_node_ngrams_view(node_id); +CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx + ON context_node_ngrams_view (context_id, ngrams_id, node_id); + +CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx + ON node_stories(ngrams_id); diff --git a/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs b/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs index 2f64f47481ed107f6fdc9f40971a763d420525bc..aa12d2710e914cd8eebd09147b4d1cea0242d585 100644 --- a/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs +++ b/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs @@ -129,24 +129,53 @@ getOccByNgramsOnlyFast cId lId nt = do query :: DPS.Query query = [sql| - WITH node_context_ids AS - (select context_id, ngrams_id - FROM context_node_ngrams_view + WITH cnnv AS + ( SELECT DISTINCT context_node_ngrams.context_id, + context_node_ngrams.ngrams_id, + nodes_contexts.node_id + FROM nodes_contexts + JOIN context_node_ngrams ON context_node_ngrams.context_id = nodes_contexts.context_id + ), + node_context_ids AS + (SELECT context_id, ngrams_id, terms + FROM cnnv + JOIN ngrams ON cnnv.ngrams_id = ngrams.id WHERE node_id = ? - ), ns AS - (select ngrams_id FROM node_stories - WHERE node_id = ? AND ngrams_type_id = ? - ) - - SELECT ng.terms, - ARRAY ( SELECT DISTINCT context_id - FROM node_context_ids - WHERE ns.ngrams_id = node_context_ids.ngrams_id - ) - AS context_ids - FROM ngrams ng - JOIN ns ON ng.id = ns.ngrams_id + ), + ncids_agg AS + (SELECT ngrams_id, terms, array_agg(DISTINCT context_id) AS agg + FROM node_context_ids + GROUP BY (ngrams_id, terms)), + ns AS + (SELECT ngrams_id, terms + FROM node_stories + JOIN ngrams ON ngrams_id = ngrams.id + WHERE node_id = ? AND ngrams_type_id = ? + ) + + SELECT ns.terms, CASE WHEN agg IS NULL THEN '{}' ELSE agg END + FROM ns + LEFT JOIN ncids_agg ON ns.ngrams_id = ncids_agg.ngrams_id |] + -- query = [sql| + -- WITH node_context_ids AS + -- (select context_id, ngrams_id + -- FROM context_node_ngrams_view + -- WHERE node_id = ? + -- ), ns AS + -- (select ngrams_id FROM node_stories + -- WHERE node_id = ? AND ngrams_type_id = ? + -- ) + + -- SELECT ng.terms, + -- ARRAY ( SELECT DISTINCT context_id + -- FROM node_context_ids + -- WHERE ns.ngrams_id = node_context_ids.ngrams_id + -- ) + -- AS context_ids + -- FROM ngrams ng + -- JOIN ns ON ng.id = ns.ngrams_id + -- |] selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType @@ -403,6 +432,5 @@ refreshNgramsMaterialized :: Cmd err () refreshNgramsMaterialized = void $ execPGSQuery refreshNgramsMaterializedQuery () where refreshNgramsMaterializedQuery :: DPS.Query - refreshNgramsMaterializedQuery = [sql| refresh materialized view context_node_ngrams_view; |] - - + refreshNgramsMaterializedQuery = + [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |]