Hana 9e216da9ef go.mod: add go.mod and move pygments to third_party
After go1.16, go will use module mode by default,
even when the repository is checked out under GOPATH
or in a one-off directory. Add go.mod, go.sum to keep
this repo buildable without opting out of the module
mode.

> go mod init github.com/mmcgrana/gobyexample
> go mod tidy
> go mod vendor

In module mode, the 'vendor' directory is special
and its contents will be actively maintained by the
go command. pygments aren't the dependency the go will
know about, so it will delete the contents from vendor
directory. Move it to `third_party` directory now.

And, vendor the blackfriday package.

Note: the tutorial contents are not affected by the
change in go1.16 because all the examples in this
tutorial ask users to run the go command with the
explicit list of files to be compiled (e.g.
`go run hello-world.go` or `go build command-line-arguments.go`).
When the source list is provided, the go command does
not have to compute the build list and whether it's
running in GOPATH mode or module mode becomes irrelevant.
2021-02-15 16:45:26 -05:00

149 lines
6.5 KiB
Pig

/**
* This script is an example recommender (using made up data) showing how you might modify item-item links
* by defining similar relations between items in a dataset and customizing the change in weighting.
* This example creates metadata by using the genre field as the metadata_field. The items with
* the same genre have it's weight cut in half in order to boost the signals of movies that do not have the same genre.
* This technique requires a customization of the standard GetItemItemRecommendations macro
*/
import 'recommenders.pig';
%default INPUT_PATH_PURCHASES '../data/retail/purchases.json'
%default INPUT_PATH_WISHLIST '../data/retail/wishlists.json'
%default INPUT_PATH_INVENTORY '../data/retail/inventory.json'
%default OUTPUT_PATH '../data/retail/out/modify_item_item'
/******** Custom GetItemItemRecommnedations *********/
define recsys__GetItemItemRecommendations_ModifyCustom(user_item_signals, metadata) returns item_item_recs {
-- Convert user_item_signals to an item_item_graph
ii_links_raw, item_weights = recsys__BuildItemItemGraph(
$user_item_signals,
$LOGISTIC_PARAM,
$MIN_LINK_WEIGHT,
$MAX_LINKS_PER_USER
);
-- NOTE this function is added in order to combine metadata with item-item links
-- See macro for more detailed explination
ii_links_metadata = recsys__AddMetadataToItemItemLinks(
ii_links_raw,
$metadata
);
/********* Custom Code starts here ********/
--The code here should adjust the weights based on an item-item link and the equality of metadata.
-- In this case, if the metadata is the same, the weight is reduced. Otherwise the weight is left alone.
ii_links_adjusted = foreach ii_links_metadata generate item_A, item_B,
-- the amount of weight adjusted is dependant on the domain of data and what is expected
-- It is always best to adjust the weight by multiplying it by a factor rather than addition with a constant
(metadata_B == metadata_A ? (weight * 0.5): weight) as weight;
/******** Custom Code stops here *********/
-- remove negative numbers just incase
ii_links_adjusted_filt = foreach ii_links_adjusted generate item_A, item_B,
(weight <= 0 ? 0: weight) as weight;
-- Adjust the weights of the graph to improve recommendations.
ii_links = recsys__AdjustItemItemGraphWeight(
ii_links_adjusted_filt,
item_weights,
$BAYESIAN_PRIOR
);
-- Use the item-item graph to create item-item recommendations.
$item_item_recs = recsys__BuildItemItemRecommendationsFromGraph(
ii_links,
$NUM_RECS_PER_ITEM,
$NUM_RECS_PER_ITEM
);
};
/******* Load Data **********/
--Get purchase signals
purchase_input = load '$INPUT_PATH_PURCHASES' using org.apache.pig.piggybank.storage.JsonLoader(
'row_id: int,
movie_id: chararray,
movie_name: chararray,
user_id: chararray,
purchase_price: int');
--Get wishlist signals
wishlist_input = load '$INPUT_PATH_WISHLIST' using org.apache.pig.piggybank.storage.JsonLoader(
'row_id: int,
movie_id: chararray,
movie_name: chararray,
user_id: chararray');
/******* Convert Data to Signals **********/
-- Start with choosing 1 as max weight for a signal.
purchase_signals = foreach purchase_input generate
user_id as user,
movie_name as item,
1.0 as weight;
-- Start with choosing 0.5 as weight for wishlist items because that is a weaker signal than
-- purchasing an item.
wishlist_signals = foreach wishlist_input generate
user_id as user,
movie_name as item,
0.5 as weight;
user_signals = union purchase_signals, wishlist_signals;
/******** Changes for Modifying item-item links ******/
inventory_input = load '$INPUT_PATH_INVENTORY' using org.apache.pig.piggybank.storage.JsonLoader(
'movie_title: chararray,
genres: bag{tuple(content:chararray)}');
metadata = foreach inventory_input generate
FLATTEN(genres) as metadata_field,
movie_title as item;
-- requires the macro to be written seperately
--NOTE this macro is defined within this file for clarity
item_item_recs = recsys__GetItemItemRecommendations_ModifyCustom(user_signals, metadata);
/******* No more changes ********/
user_item_recs = recsys__GetUserItemRecommendations(user_signals, item_item_recs);
--Completely unrelated code stuck in the middle
data = LOAD 's3n://my-s3-bucket/path/to/responses'
USING org.apache.pig.piggybank.storage.JsonLoader();
responses = FOREACH data GENERATE object#'response' AS response: map[];
out = FOREACH responses
GENERATE response#'id' AS id: int, response#'thread' AS thread: chararray,
response#'comments' AS comments: {t: (comment: chararray)};
STORE out INTO 's3n://path/to/output' USING PigStorage('|');
/******* Store recommendations **********/
-- If your output folder exists already, hadoop will refuse to write data to it.
rmf $OUTPUT_PATH/item_item_recs;
rmf $OUTPUT_PATH/user_item_recs;
store item_item_recs into '$OUTPUT_PATH/item_item_recs' using PigStorage();
store user_item_recs into '$OUTPUT_PATH/user_item_recs' using PigStorage();
-- STORE the item_item_recs into dynamo
STORE item_item_recs
INTO '$OUTPUT_PATH/unused-ii-table-data'
USING com.mortardata.pig.storage.DynamoDBStorage('$II_TABLE', '$AWS_ACCESS_KEY_ID', '$AWS_SECRET_ACCESS_KEY');
-- STORE the user_item_recs into dynamo
STORE user_item_recs
INTO '$OUTPUT_PATH/unused-ui-table-data'
USING com.mortardata.pig.storage.DynamoDBStorage('$UI_TABLE', '$AWS_ACCESS_KEY_ID', '$AWS_SECRET_ACCESS_KEY');