Website : rimsha.abasa.com
backdoor
Home
Console
Upload
information
Create File
Create Folder
About
Tools
:
/
var
/
canvas
/
lib
/
Filename :
course_link_validator.rb
back
Copy
# frozen_string_literal: true # # Copyright (C) 2014 - present Instructure, Inc. # # This file is part of Canvas. # # Canvas is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation, version 3 of the License. # # Canvas is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. # # You should have received a copy of the GNU Affero General Public License along # with this program. If not, see <http://www.gnu.org/licenses/>. require "nokogiri" class CourseLinkValidator TAG = "link_validation" # retrieves the validation job def self.current_progress(course) Progress.where(tag: TAG, context_type: "Course", context_id: course.id).last end # creates a new validation job def self.queue_course(course) progress = current_progress(course) return progress if progress&.pending? progress ||= Progress.new(tag: TAG, context: course) progress.reset! progress.process_job(self, :process, {}) progress end def self.process(progress) validator = new(progress.context) validator.check_course(progress) progress.set_results({ issues: validator.issues, completed_at: Time.now.utc, version: 2 }) rescue report_id = Canvas::Errors.capture_exception(:course_link_validation, $ERROR_INFO)[:error_report] progress.workflow_state = "failed" progress.set_results({ error_report_id: report_id, completed_at: Time.now.utc }) end attr_accessor :course, :domain_regex, :issues, :visited_urls def initialize(course) self.course = course domain = course.root_account.domain self.domain_regex = %r{\w+:?//#{domain}/} if domain self.issues = [] self.visited_urls = {} end # **************************************************************** # this is where the magic happens def check_course(progress) # Course card image if course.image_url.present? find_invalid_link(course.image_url) do |link| issues << { name: I18n.t("Course Card Image"), type: :course_card_image, content_url: "/courses/#{course.id}/settings", invalid_links: [link.merge(image: true)] } end progress.update_completion! 1 end # Syllabus find_invalid_links(course.syllabus_body) do |links| issues << { name: I18n.t(:syllabus, "Course Syllabus"), type: :syllabus, content_url: "/courses/#{course.id}/assignments/syllabus" }.merge(invalid_links: links) end progress.update_completion! 5 # Assessment questions course.assessment_questions.active.each do |aq| next if aq.assessment_question_bank.deleted? check_question(aq) end progress.update_completion! 15 # Assignments course.assignments.active.each do |assignment| next if assignment.quiz || assignment.discussion_topic find_invalid_links(assignment.description) do |links| issues << { name: assignment.title, type: :assignment, content_url: "/courses/#{course.id}/assignments/#{assignment.id}" }.merge(invalid_links: links) end end progress.update_completion! 25 # Calendar events course.calendar_events.active.each do |event| find_invalid_links(event.description) do |links| issues << { name: event.title, type: :calendar_event, content_url: "/courses/#{course.id}/calendar_events/#{event.id}" }.merge(invalid_links: links) end end progress.update_completion! 35 # Discussion topics course.discussion_topics.active.each do |topic| find_invalid_links(topic.message) do |links| issues << { name: topic.title, type: :discussion_topic, content_url: "/courses/#{course.id}/discussion_topics/#{topic.id}" }.merge(invalid_links: links) end end progress.update_completion! 55 # External URL Module items (almost forgot about these) invalid_module_links = {} course.context_module_tags.not_deleted.where(content_type: "ExternalUrl").preload(:context_module).each do |ct| find_invalid_link(ct.url) do |invalid_link| (invalid_module_links[ct.context_module] ||= []) << invalid_link.merge(link_text: ct.title) end end invalid_module_links.each do |mod, links| issues << { name: mod.name, type: :module, content_url: "/courses/#{course.id}/modules#module_#{mod.id}" }.merge(invalid_links: links) end progress.update_completion! 65 # Quizzes course.quizzes.active.each do |quiz| find_invalid_links(quiz.description) do |links| issues << { name: quiz.title, type: :quiz, content_url: "/courses/#{course.id}/quizzes/#{quiz.id}" }.merge(invalid_links: links) end quiz.quiz_questions.active.each do |qq| check_question(qq) end end progress.update_completion! 85 # Wiki pages course.wiki_pages.not_deleted.each do |page| find_invalid_links(page.body) do |links| issues << { name: page.title, type: :wiki_page, content_url: "/courses/#{course.id}/pages/#{page.url}" }.merge(invalid_links: links) end end progress.update_completion! 99 end def check_question(question) # Assessment/Quiz Questions links = [] %i[question_text correct_comments_html incorrect_comments_html neutral_comments_html more_comments_html].each do |field| find_invalid_links(question.question_data[field]) do |field_links| links += field_links end end (question.question_data[:answers] || []).each do |answer| %i[html comments_html left_html].each do |field| find_invalid_links(answer[field]) do |field_links| links += field_links end end end if links.any? hash = { name: question.question_data[:question_name] }.merge(invalid_links: links) case question when AssessmentQuestion hash[:type] = :assessment_question hash[:content_url] = "/courses/#{course.id}/question_banks/#{question.assessment_question_bank_id}#question_#{question.id}_question_text" when Quizzes::QuizQuestion hash[:type] = :quiz_question hash[:content_url] = "/courses/#{course.id}/quizzes/#{question.quiz_id}/take?preview=1#question_#{question.id}" end issues << hash end end # pretty much copied from CanvasImportedHtmlConverter def find_invalid_links(html) links = [] doc = Nokogiri::HTML5(html || "") attrs = %w[href src data value] doc.search("*").each do |node| attrs.each do |attr| url = node[attr] next unless url.present? if attr == "value" && !(node["name"] && node["name"] == "src") next end find_invalid_link(url) do |invalid_link| link_text = node.text.presence invalid_link[:link_text] = link_text if link_text invalid_link[:image] = true if node.name == "img" links << invalid_link end end end yield links if links.any? end # yields a hash containing the url and an error type if the url is invalid def find_invalid_link(url) return if url.start_with?("mailto:") unless (result = visited_urls[url]) begin if CanvasLinkMigrator.relative_url?(url) || (domain_regex && url.match(domain_regex)) result = if valid_route?(url) if url.match(%r{/courses/(\d+)}) && course.id.to_s != $1 :course_mismatch else check_object_status(url) end else :unreachable end else unless reachable_url?(url) result = :unreachable end end rescue URI::Error result = :unparsable end result ||= :success visited_urls[url] = result end unless result == :success invalid_link = { url:, reason: result } yield invalid_link end end # checks against the Rails routes to see if the url matches anything def valid_route?(url) path = URI.parse(url).path path = ActionDispatch::Journey::Router::Utils.normalize_path(path) @route_set ||= ::Rails.application.routes.set.routes.select { |r| r.verb == "GET" } @route_set.any? { |r| r.path.match(path) } || (!Pathname(path).each_filename.include?("..") && Rails.public_path.join(path.delete_prefix("/")).file?) end # makes sure that links to course objects exist and are in a visible state def check_object_status(url, object: nil) return :missing_item unless valid_route?(url) return :missing_item if url.include?("/test_error") object ||= Context.find_asset_by_url(url) unless object path = URI.parse(url).path return :missing_item unless [nil, "syllabus"].include?(path.match(%r{/courses/\d+/\w+/(.+)})&.[](1)) return :missing_item if path.include?("/media_objects_iframe/") return nil end if object.deleted? return :deleted end case object when Attachment return :unpublished_item if object.locked? when Quizzes::Quiz return :unpublished_item if object.workflow_state == "created" || object.workflow_state == "unpublished" else return :unpublished_item if object.workflow_state == "unpublished" end nil rescue :missing_item end # whitelisted hosts will never be flagged as unavailable def whitelisted?(url) @whitelist ||= Setting.get("link_validator_whitelisted_hosts", "").split(",") return false if @whitelist.empty? host = URI.parse(url).host @whitelist.include?(host) rescue URI::InvalidURIError false end # ping the url and make sure we get a 200 def reachable_url?(url) return true if whitelisted?(url) @unavailable_photo_redirect_pattern ||= Regexp.new(Setting.get("unavailable_photo_redirect_pattern", "yimg\\.com/.+/photo_unavailable.png$")) redirect_proc = lambda do |response| # flickr does a redirect to this file when a photo is deleted/not found; # treat this as a broken image instead of following the redirect url = response["Location"] raise RuntimeError("photo unavailable") if url&.match?(@unavailable_photo_redirect_pattern) end begin response = CanvasHttp.head(url, { "Accept-Encoding" => "gzip" }, redirect_limit: 9, redirect_spy: redirect_proc) if %w[404 405].include?(response.code) response = CanvasHttp.get(url, { "Accept-Encoding" => "gzip" }, redirect_limit: 9, redirect_spy: redirect_proc) do # don't read the response body end end case response.code when /^2/, "401", "403", "429", "503" # we accept unauthorized and forbidden codes here because sometimes servers refuse to serve our requests # and someone can link to a site that requires authentication anyway - doesn't necessarily make it invalid true else false end rescue false end end end